from filetypes.base import *
import malcat
import struct
import io

PROP_ID2NAME = {
        0x00: "kEnd",
        0x01: "kHeader",
        0x02: "kArchiveProperties",
        0x03: "kAdditionalStreamsInfo",
        0x04: "kMainStreamsInfo",
        0x05: "kFilesInfo",
        0x06: "kPackInfo",
        0x07: "kUnPackInfo",
        0x08: "kSubStreamsInfo",
        0x09: "kSize",
        0x0A: "kCRC",
        0x0B: "kFolder",
        0x0C: "kCodersUnPackSize",
        0x0D: "kNumUnPackStream",
        0x0E: "kEmptyStream",
        0x0F: "kEmptyFile",
        0x10: "kAnti",
        0x11: "kName",
        0x12: "kCTime",
        0x13: "kATime",
        0x14: "kMTime",
        0x15: "kWinAttributes",
        0x16: "kComment",
        0x17: "kEncodedHeader",
        0x18: "kStartPos",
        0x19: "kDummy",
}

PROP_NAME2ID = dict([(x[1], x[0]) for x in PROP_ID2NAME.items()])

class VarU64(Struct):

    def parse(self):
        fb, = struct.unpack("<B", self.look_ahead(1))
        mask = 0x80
        asize = 1
        for i in range(8):
            if (fb & mask) == 0:
                break
            asize += 1
            mask = mask >> 1
        else:
            raise FatalError("Invalid mask value for VarU64: {}".format(fb))
        if asize:
            yield Bytes(asize, name="Encoded")

    @staticmethod
    def decode(var):
        res = 0
        tab = var["Encoded"]
        mask = 0x7F >> (len(tab) - 1)
        res = (tab[0] & mask) << (8 * (len(tab) - 1))
        for i in range(1, len(tab)):
            res += tab[i] << (8 * (i - 1))
        return res

class SignatureHeader(Struct):

    def parse(self):
        yield Bytes(6, name="Signature")
        yield UInt8(name="MajorVersion", comment="7z version (major)")
        yield UInt8(name="MinorVersion", comment="7z version (minor)")
        crc = yield UInt32(name="StartHeaderCRC")
        if crc == 0:
            raise FatalError("empty crc")
        yield NextHeader()


class NextHeader(Struct):

    def parse(self):
        yield UInt64(name="NextHeaderOffset")
        sz = yield UInt64(name="NextHeaderSize")
        crc = yield UInt32(name="NextHeaderCRC")
        if crc == 0:
            raise FatalError("empty crc")


class PackInfo(Struct):

    def parse(self):
        val = yield UInt8(name="PropertyID", values=list(PROP_NAME2ID.items()))
        if val != PROP_NAME2ID["kPackInfo"]:
            raise FatalError("Invalid property ID {} for PackInfo".format(val))
        pp = yield PrefixedVarUInt64(name="PackPos")
        num = yield PrefixedVarUInt64(name="NumPackStreams")

        while self.remaining():
            pid, = struct.unpack("<B", self.look_ahead(1))
            if pid == PROP_NAME2ID["kSize"]:
                s = yield SizeInfo(count=num)
            elif pid == PROP_NAME2ID["kCRC"]:
                yield CrcInfo(count=num)
            elif pid == PROP_NAME2ID["kEnd"]:
                yield EndInfo()
                break
            else:
                raise FatalError("Invalid property ID at {:x}: {}".format(self.offset, pid))


class FolderInfo(Struct):

    def parse(self):
        val = yield UInt8(name="PropertyID", values=list(PROP_NAME2ID.items()))
        if val != PROP_NAME2ID["kFolder"]:
            raise FatalError("Invalid property ID {} for FolderInfo".format(val))
        num = yield PrefixedVarUInt64(name="NumFolders")
        is_external = yield UInt8(name="External")
        if is_external:
            yield PrefixedVarUInt64(name="FolderDataOffset", comment="offset to folder data within data stream")
        else:
            for i in range(num):
                yield Folder(name="Folder[{}]".format(i))

class Folder(Struct):

    def parse(self):
        num = yield PrefixedVarUInt64(name="NumCoders")
        coders = []
        for i in range(num):
            cd = yield Coder(name="Coder[{}]".format(i))
            coders.append(cd)
        num_out_streams = 0
        num_in_streams = 0
        for i in range(num):
            if "NumOutStreams" in coders[i]:
                num_out_streams += coders[i]["NumOutStreams"]
                num_in_streams += coders[i]["NumInStreams"]
            else:
                num_in_streams += 1
                num_out_streams += 1
        num_bind_pairs = max(num_out_streams - 1, 0)
        if num_bind_pairs > 0:
            for i in range(num_bind_pairs):
                yield BindPairsInfo(name="BindPair[{}]".format(i))
        num_packed_streams = num_in_streams - num_out_streams + 1
        if num_packed_streams > 1:
            for i in range(num_packed_streams):
                yield PrefixedVarUInt64(name="Index[{}]".format(i))


class Coder(Struct):
    
    def parse(self):
        flags = yield UInt8(name="Flags")
        size = flags & 0xF
        if size:
            yield Bytes(size, name="CodecId")
        if flags & 0x10:
            yield PrefixedVarUInt64(name="NumInStreams")
            yield PrefixedVarUInt64(name="NumOutStreams")
        if flags & 0x20:
            num_props = yield PrefixedVarUInt64(name="PropertiesSize")
            yield Bytes(num_props, name="Properties")


class BindPairsInfo(Struct):

    def parse(self):
        yield PrefixedVarUInt64(name="InIndex")
        yield PrefixedVarUInt64(name="OutIndex")


class UnpackInfo(Struct):

    def parse(self):
        val = yield UInt8(name="PropertyID", values=list(PROP_NAME2ID.items()))
        if val != PROP_NAME2ID["kUnPackInfo"]:
            raise FatalError("Invalid property ID {} for UnpackInfo".format(val))

        folder_info = yield FolderInfo()
        num_folders = folder_info["NumFolders"]
        if num_folders:
            val = yield UInt8(name="PropertyID", values=list(PROP_NAME2ID.items()))
            if val != PROP_NAME2ID["kCodersUnPackSize"]:
                raise FatalError("Invalid property ID {} for SizeInfo".format(val))
            for f in range(num_folders):
                folder = folder_info["Folder[{}]".format(f)]
                num_coders = folder["NumCoders"]
                num_out_streams = 0
                for i in range(num_coders):
                    if "NumOutStreams" in folder["Coder[{}]".format(i)]:
                        num_out_streams += folder["Coder[{}]".format(i)]["NumOutStreams"]
                    else:
                        num_out_streams += 1
                for s in range(num_out_streams):
                    yield PrefixedVarUInt64(name="UnpackSize[{:d}][{:d}]".format(f, s))

        while self.remaining():
            pid, = struct.unpack("<B", self.look_ahead(1))
            if pid == PROP_NAME2ID["kCRC"]:
                yield CrcInfo(count=num_folders)
            elif pid == PROP_NAME2ID["kEnd"]:
                yield EndInfo()
                break
            else:
                raise FatalError("Invalid property ID at {:x}: {}".format(self.offset, pid))            


class EndInfo(Struct):
    def parse(self):
        val = yield UInt8(name="PropertyID", values=list(PROP_NAME2ID.items()))
        if val != PROP_NAME2ID["kEnd"]:
            raise FatalError("Invalid property ID {} for EndInfo".format(val))


class SizeInfo(Struct):

    def __init__(self, count, **kwargs):
        Struct.__init__(self, **kwargs)
        self.count = count
        
    def parse(self):
        val = yield UInt8(name="PropertyID", values=list(PROP_NAME2ID.items()))
        if val != PROP_NAME2ID["kSize"]:
            raise FatalError("Invalid property ID {} for SizeInfo".format(val))
        for i in range(self.count):
            sz = yield PrefixedVarUInt64(name="Size[{:d}]".format(i))


class SubStreamInfo(Struct):

    def __init__(self, num_folders, **kwargs):
        Struct.__init__(self, **kwargs)
        self.num_folders = num_folders
        
    def parse(self):
        val = yield UInt8(name="PropertyID", values=list(PROP_NAME2ID.items()))
        pid, = struct.unpack("<B", self.look_ahead(1))
        unpack_stream_folders = [1] * self.num_folders
        if pid == PROP_NAME2ID["kNumUnPackStream"]:
            ussi = yield UnpackStreamSizeInfo(self.num_folders)
            for i in range(ussi.count - 1):
                unpack_stream_folders[i] = ussi[i+1]
            pid, = struct.unpack("<B", self.look_ahead(1))
        if pid == PROP_NAME2ID["kSize"]:
            for sfsize in unpack_stream_folders:
                yield SizeInfo(count=sfsize - 1)
        num_crc = 0
        for sfsize in unpack_stream_folders:
            if sfsize != 1 or True:
                num_crc += sfsize
        if num_crc:
            yield CrcInfo(count=num_crc)
        yield EndInfo()


class UnpackStreamSizeInfo(Struct):

    def __init__(self, num_folders, **kwargs):
        Struct.__init__(self, **kwargs)
        self.num_folders = num_folders
        
    def parse(self):
        val = yield UInt8(name="PropertyID", values=list(PROP_NAME2ID.items()))
        for i in range(self.num_folders):
            yield PrefixedVarUInt64(name="NumUnpackStream[{:d}]".format(i))



class CrcInfo(Struct):

    def __init__(self, **kwargs):
        self.count = kwargs["count"]
        del kwargs["count"]
        Struct.__init__(self, **kwargs)

    def parse(self):
        val = yield UInt8(name="PropertyID", values=list(PROP_NAME2ID.items()))
        if val != PROP_NAME2ID["kCRC"]:
            raise FatalError("Invalid property ID {} for CrcInfo".format(val))        
        all_defined = yield UInt8(name="AllDefined")
        defined = None
        if not all_defined:
            l = []
            for i in range(self.count):
                l.append(Bit(name="Defined[{:d}]".format(i)))
            defined = yield BitsField(*l, name="DefinedArray")
        for i in range(self.count):
            if all_defined or defined[i]:
                yield UInt32(name="Crc[{:d}]".format(i), comment="digest for nth stream")


class StreamInfo(Struct):

    def parse(self):
        val = yield UInt8(name="PropertyID", values=list(PROP_NAME2ID.items()))
        num_folders = 0
        while self.remaining():
            pid, = struct.unpack("<B", self.look_ahead(1))
            if pid == PROP_NAME2ID["kPackInfo"]:
                yield PackInfo()
            elif pid == PROP_NAME2ID["kUnPackInfo"]:
                ui = yield UnpackInfo()
                num_folders = ui["FolderInfo"]["NumFolders"]
            elif pid == PROP_NAME2ID["kSubStreamsInfo"]:
                yield SubStreamInfo(num_folders)                
            elif pid == PROP_NAME2ID["kEnd"]:
                yield EndInfo()
                break
            else:
                raise FatalError("Invalid property ID at {:x}: {}".format(self.offset, pid))



class FilesInfo(Struct):

    def parse(self):
        val = yield UInt8(name="PropertyID", values=list(PROP_NAME2ID.items()))
        if val != PROP_NAME2ID["kFilesInfo"]:
            raise FatalError("Invalid property ID {} for FilesInfo".format(val))
        numfiles = yield PrefixedVarUInt64(name="NumberOfFiles")

        while self.remaining():
            pid, = struct.unpack("<B", self.look_ahead(1))
            if pid == PROP_NAME2ID["kCRC"]:
                yield CrcInfo(count=num_folders)
            elif pid == PROP_NAME2ID["kDummy"]:
                yield DummyProperty()
            elif pid == PROP_NAME2ID["kName"]:
                yield FilenameProperty(numfiles)
            elif pid == PROP_NAME2ID["kATime"]:
                yield TimeProperty(numfiles, name="AccessTime")
            elif pid == PROP_NAME2ID["kCTime"]:
                yield TimeProperty(numfiles, name="CreationTime")
            elif pid == PROP_NAME2ID["kMTime"]:
                yield TimeProperty(numfiles, name="ModificationTime")
            elif pid == PROP_NAME2ID["kEnd"]:
                yield EndInfo()
                break
            else:
                yield UnknownProperty()


class DummyProperty(Struct):

    def parse(self):
        val = yield UInt8(name="PropertyID", values=list(PROP_NAME2ID.items()))
        size = yield PrefixedVarUInt64(name="Size")
        yield Unused(size)


class UnknownProperty(Struct):

    def parse(self):
        val = yield UInt8(name="PropertyID", values=list(PROP_NAME2ID.items()))
        size = yield PrefixedVarUInt64(name="Size")
        yield Unused(size)


class FilenameProperty(Struct):

    def __init__(self, count, **kwargs):
        Struct.__init__(self, **kwargs)
        self.count = count

    def parse(self):
        val = yield UInt8(name="PropertyID", values=list(PROP_NAME2ID.items()))
        size = yield PrefixedVarUInt64(name="Size")
        external = yield UInt8(name="IsExternal")
        if not external:
            for i in range(self.count):
                yield CStringUtf16le(name="Name[{}]".format(i), max_size=size)


class TimeProperty(Struct):

    def __init__(self, count, **kwargs):
        Struct.__init__(self, **kwargs)
        self.count = count

    def parse(self):
        val = yield UInt8(name="PropertyID", values=list(PROP_NAME2ID.items()))
        size = yield PrefixedVarUInt64(name="Size")
        all_defined = yield UInt8(name="AllDefined")
        defined = [False] * self.count
        if not all_defined:
            l = []
            for i in range(self.count):
                l.append(Bit(name="Defined[{:d}]".format(i)))
            defined = yield BitsField(*l, name="DefinedArray")
        yield UInt8(name="IsExternal")
        for i in range(self.count):
            if all_defined or defined[i]:
                yield Filetime(name="{}[{:d}]".format(self.name, i), comment="file time")


class Header(Struct):

    def parse(self):
        val = yield UInt8(name="PropertyID", values=list(PROP_NAME2ID.items()))
        while self.remaining():
            pid, = struct.unpack("<B", self.look_ahead(1))
            if pid == PROP_NAME2ID["kMainStreamsInfo"]:
                yield StreamInfo(name="MainStreamInfo")
            elif pid == PROP_NAME2ID["kFilesInfo"]:
                yield FilesInfo()
            elif pid == PROP_NAME2ID["kEnd"]:
                yield EndInfo()
                break
            else:
                raise FatalError("Invalid property ID at {:x}: {}".format(self.offset, pid))
            


class SevenZipAnalyzer(FileTypeAnalyzer):
    category = malcat.FileType.ARCHIVE
    name = "7Z"
    regexp = r"7z\xbc\xaf\x27\x1c"

    def open(self, vfile, password=None):
        import py7zr
        buf = io.BytesIO(self.read(0, self.size()))
        if not password:
            password = self.password
        try:
            archive = py7zr.SevenZipFile(buf, password=password)
            return archive.read([vfile.path])[vfile.path].getvalue()
        except py7zr.exceptions.PasswordRequired:
            raise InvalidPassword()


    def parse(self, hint):
        self.password = None
        sig = yield SignatureHeader(category=Type.HEADER)
        body_offset = self.tell()
        header_offset = 32 + sig["NextHeader"]["NextHeaderOffset"]
        self.jump(header_offset)
        pid, = struct.unpack("<B", self.read(self.tell(), 1))
        if pid == PROP_NAME2ID["kEncodedHeader"]:
            encoded = True
            si = yield StreamInfo(name="EncodedHeader")
        elif pid == PROP_NAME2ID["kHeader"]:
            encoded = False
            header = yield Header(name="Header")
            si = header["MainStreamInfo"]
        else:
            raise FatalError("Only encoded header supported for now, got {} at #{:x}".format(PROP_ID2NAME.get(pid, pid), (self.tell())))
        try:
            import py7zr
            has_py7zr = True
        except ImportError:
            has_py7zr = False
        if has_py7zr:
            buf = io.BytesIO(self.read(0, self.size()))
            pwdlist = [None, "infected", "virus", "malware"]
            for pwd in pwdlist:
                buf.seek(0)
                files = []
                try:
                    archive = py7zr.SevenZipFile(buf, password=pwd)
                    if pwd is None and archive.needs_password():
                        continue
                    for f in archive.list():
                        if not f.is_directory:
                            files.append((f.filename, f.uncompressed, "open"))
                    archive.testzip()
                    for fname, size, unp in files:
                        self.add_file(fname, size, unp)
                    self.password = pwd
                    break
                except py7zr.exceptions.PasswordRequired:
                    pass
                except py7zr.exceptions.Bad7zFile:
                    pass
                except BaseException as e:
                    print(e)
            else:
                print("Could not open 7zip archive using any of the standard password")
        else:
            print("Install py7zr library to extract 7z files")
        self.confirm()

        if False:
            nps =  si["PackInfo"]["NumPackStreams"]
            body_size = si["PackInfo"]["PackPos"]
            self.jump(body_offset)
            if body_size:
                yield Bytes(body_size, name="Data", category=Type.DATA)
            packed_size = 0
            for i in range(nps):
                packed_size += si["PackInfo"]["SizeInfo"]["Size[{:d}]".format(i)]
            if packed_size:
                yield Bytes(packed_size, name="FileInfo", category=Type.META)
            if self.tell() < header_offset:
                yield Bytes(header_offset - self.tell(), name="Overlay", category=Type.ANOMALY)

