from filetypes.base import *
import malcat
import string
import codecs
import re
import datetime
import struct
from filetypes.OfficeVbaDir import VBADirAnalyzer


# from oletools
# Mapping from codepages to Python codecs, when 'cpXXX' does not work
# (inspired from http://stackoverflow.com/questions/1592925/decoding-mac-os-text-in-python)
CODEPAGE_TO_CODEC = {
    37: 'cp037',
    708: 'arabic', # not found: Arabic (ASMO 708) => arabic = iso-8859-6
    709: 'arabic', # not found: Arabic (ASMO-449+, BCON V4) => arabic = iso-8859-6
    710: 'arabic', # not found: Arabic - Transparent Arabic => arabic = iso-8859-6
    870: 'latin2', # IBM EBCDIC Multilingual/ROECE (Latin 2); IBM EBCDIC Multilingual Latin 2
    1047: 'latin1', # IBM EBCDIC Latin 1/Open System
    1141: 'cp273', # IBM EBCDIC Germany (20273 + Euro symbol); IBM EBCDIC (Germany-Euro)
    1200: 'utf_16_le', # Unicode UTF-16, little endian byte order (BMP of ISO 10646); available only to managed applications
    1201: 'utf_16_be', # Unicode UTF-16, big endian byte order; available only to managed applications

    10000: 'mac-roman',
    10001: 'shiftjis',  # not found: 'mac-shift-jis',
    10002: 'big5',      # not found: 'mac-big5',
    10003: 'ascii',     # nothing appropriate found: 'mac-hangul',
    10004: 'mac-arabic',
    10005: 'hebrew',    # not found: 'mac-hebrew',
    10006: 'mac-greek',
    #10007: 'ascii',     # nothing appropriate found: 'mac-russian',
    10007: 'mac_cyrillic',  # guess (from xlrd)
    10008: 'gb2312',    # not found: 'mac-gb2312',
    10021: 'thai',      # not found: mac-thai',
    #10029: 'maccentraleurope',  # not found: 'mac-east europe',
    10029: 'mac_latin2',  # guess (from xlrd)
    10079: 'mac_iceland',  # guess (from xlrd)
    10081: 'mac-turkish',

    12000: 'utf_32_le', # Unicode UTF-32, little endian byte order
    12001: 'utf_32_be', # Unicode UTF-32, big endian byte order

    20127: 'ascii',

    28591: 'latin1',
    28592: 'iso8859_2',
    28593: 'iso8859_3',
    28594: 'iso8859_4',
    28595: 'iso8859_5',
    28596: 'iso8859_6',
    28597: 'iso8859_7',
    28598: 'iso8859_8',
    28599: 'iso8859_9',
    28603: 'iso8859_13',
    28605: 'iso8859_15',

    32768: 'mac_roman', # from xlrd
    32769: 'cp1252', # from xlrd
    38598: 'iso8859_8',

    65000: 'utf7',
    65001: 'utf8',
}



def codepage2codec(codepage):
    if codepage in CODEPAGE_TO_CODEC:
        codec = CODEPAGE_TO_CODEC[codepage]
    else:
        codec = 'cp{}'.format(codepage)
    try:
        codecs.lookup(codec)
    except LookupError:
        #log.error('Codec not found for code page %d, using UTF-8 as fallback.' % codepage)
        codec = 'utf8'
    return codec

#eof oletools


# https://poi.apache.org/components/hpsf/internals.html

BASE_PROPERTY = {
        0: ("DICTIONARY", "[Special format]", "Dictionary"),
        1: ("CODEPAGE", "VT_I2", "Code page"),
        2: ("TITLE", "VT_LPSTR", "Title"),
        3: ("SUBJECT",  "VT_LPSTR", "Subject"),
        4: ("AUTHOR",   "VT_LPSTR", "Author"),
        5: ("KEYWORDS", "VT_LPSTR", "Keywords"),
        6: ("COMMENTS", "VT_LPSTR", "Comments"),
        7: ("TEMPLATE", "VT_LPSTR", "Template"),
        8: ("LASTAUTHOR", "VT_LPSTR", "Last Saved By"),
        9: ("REVNUMBER",  "VT_LPSTR", "Revision Number"),
        10: ("EDITTIME",  "VT_FILETIME", "Total Editing Time"),
        11: ("LASTPRINTED",  "VT_FILETIME", "Last Printed"),
        12: ("CREATED", "VT_FILETIME", "Create Time/Date"),
        13: ("LASTSAVED", "VT_FILETIME", "Last Saved Time/Date"),
        14: ("PAGECOUNT", "VT_I4", "Number of Pages"),
        15: ("WORDCOUNT", "VT_I4", "Number of Words"),
        16: ("CHARCOUNT", "VT_I4", "Number of Characters"),
        17: ("THUMBNAIL", "VT_CF", "Thumbnail"),
        18: ("APPNAME", "VT_LPSTR", "Name of Creating Application"),
        19: ("SECURITY",  "VT_I4", "Security"),
        }


DOCUMENT_PROPERTY = {
        0: ("DICTIONARY", "[Special format]", "Dictionary"),
        1: ("CODEPAGE", "VT_I2", "Code page"),
        2: ("CATEGORY", "VT_LPSTR", "Category"),
        3: ("PRESFORMAT", "VT_LPSTR", "PresentationTarget"),
        4: ("BYTECOUNT",  "VT_I4", "Bytes"),
        5: ("LINECOUNT",  "VT_I4", "Lines"),
        6: ("PARCOUNT", "VT_I4", "Paragraphs"),
        7: ("SLIDECOUNT", "VT_I4", "Slides"),
        8: ("NOTECOUNT",  "VT_I4", "Notes"),
        9: ("HIDDENCOUNT",   "VT_I4", "HiddenSlides"),
        10: ("MMCLIPCOUNT",  "VT_I4", "MMClips"),
        11: ("SCALE",   "VT_BOOL", "ScaleCrop"),
        12: ("HEADINGPAIR",  "VT_VARIANT | VT_VECTOR", "HeadingPairs"),
        13: ("DOCPARTS",  "VT_LPSTR | VT_VECTOR", "TitlesofParts"),
        14: ("MANAGER", "VT_LPSTR", "Manager"),
        15: ("COMPANY", "VT_LPSTR", "Company"),
        16: ("LINKSDIRTY",   "VT_BOOL", "LinksUpTo Date"),
        17: ("ADDITIONAL_INFO1",   "VT_LPSTR", "???"),
        18: ("ADDITIONAL_INFO2",   "VT_LPSTR", "???"),
        19: ("ADDITIONAL_INFO3",   "VT_LPSTR", "???"),
        20: ("ADDITIONAL_INFO4",   "VT_LPSTR", "???"),
        21: ("ADDITIONAL_INFO5",   "VT_LPSTR", "???"),
        22: ("ADDITIONAL_INFO6",   "VT_LPSTR", "???"),
        23: ("ADDITIONAL_INFO7",   "VT_LPSTR", "???"),
        }



class SummaryInformationHeader(Struct):

    def parse(self):
        yield UInt16(name="Endian", comment="feff for little endian")
        yield UInt16(name="Format", comment="always 0")
        yield UInt8(name="OperatingSystemVersionMajor", comment="OS major version")
        yield UInt8(name="OperatingSystemVersionMinor", comment="OS minor version")
        yield UInt16(name="OperatingSystem", comment="OS", values=[
            ("Win16", 0),
            ("Mac", 1),
            ("Win32", 2)
            ])
        yield GUID(name="Format")
        numsec = yield UInt32(name="NumSections")
        yield Array(numsec, SummarySectionHeader(self.offset), name="Sections")


class SummarySectionHeader(Struct):

    def __init__(self, base, *args, **kwargs):
        Struct.__init__(self, *args, **kwargs)
        self.base = base

    def parse(self):
        yield GUID(name="Name")
        yield Offset32(name="Offset", base=self.base)

class SummaryPropertyHeader(Struct):

    def __init__(self, base, properties, *args, **kwargs):
        Struct.__init__(self, *args, **kwargs)
        self.properties = properties
        self.base = base
    
    def parse(self):
        yield UInt32(name="Type", values=self.properties)
        yield Offset32(name="Offset", base=self.base)


class SummaryProperty(Struct):
    VARIANTS = {
        0: ("EMPTY", None),
        1: ("NULL", None),
        2: ("I2", UInt16),
        3: ("I4", UInt32),
        4: ("R4", None),
        5: ("R8", None),
        6: ("CY", None),
        7: ("DATE", None),
        8: ("BSTR", None),
        9: ("DISPATCH", None),
        10: ("ERROR", None),
        11: ("BOOL", UInt8),
        12: ("VARIANT", None),
        13: ("UNKNOWN", None),
        14: ("DECIMAL", None),
        16: ("I1", Int8),
        17: ("UI1", UInt8),
        18: ("UI2", UInt16),
        19: ("UI4", UInt32),
        20: ("I8", Int64),
        21: ("UI8", UInt64),
        22: ("INT", None),
        23: ("UINT", None),
        24: ("VOID", None),
        25: ("HRESULT", None),
        26: ("PTR", None),
        27: ("SAFEARRAY", None),
        28: ("CARRAY", None),
        29: ("USERDEFINED", None),
        30: ("LPSTR", PascalBytes),
        31: ("LPWSTR", UnicodeString),
        64: ("FILETIME", Filetime),
        65: ("BLOB", None),
        66: ("STREAM", None),
        67: ("STORAGE", None),
        68: ("STREAMED_OBJECT", None),
        69: ("STORED_OBJECT", None),
        70: ("BLOB_OBJECT", None),
        71: ("CF", PascalBytes),
        72: ("CLSID", GUID),
        0x1000: ("VECTOR", None),
        0x2000: ("ARRAY", None),
        0x4000: ("BYREF", None),
    }

    def __init__(self, sz, *args, **kwargs):
        Struct.__init__(self, *args, **kwargs)
        self.size = sz

    def parse(self):
        variant_values = [(v[0], k) for k, v in SummaryProperty.VARIANTS.items()]
        typ = yield UInt32(name="Type", values=variant_values)
        parser = SummaryProperty.VARIANTS.get(typ & 0x7f, (None, None))[1]
        if typ & 0x7f == 12:
            parser = lambda k=self.size-4, **kwargs: SummaryProperty(k, **kwargs)
        if typ & 0x1000:
            arraysz = yield UInt32(name="Count")
            for i in range(arraysz):
                yield parser(name="Element[{:d}]".format(i))
        else:
            if parser is not None:
                yield parser(name="Data")
            else:
                yield Bytes(self.size - len(self), name="Unparsed", comment="unparsed data")
           


class SummarySection(Struct):

    def __init__(self, properties, *args, **kwargs):
        Struct.__init__(self, *args, **kwargs)
        self.properties_dict = properties
    
    def parse(self):
        sz = yield UInt32(name="Size")
        n = yield UInt32(name="NumProperties")
        enum = []
        for k, v in self.properties_dict.items():
            enum.append((v[0], k))
        props = yield Array(n, SummaryPropertyHeader(self.offset, enum), name="PropertiesIndex")
        props = sorted(props, key = lambda x: x["Offset"])
        for i, prop in enumerate(props):
            off = prop["Offset"]
            if off > len(self):
                yield Unused(off - len(self), name="Padding")
            if i < len(props) - 1:
                prop_sz = props[i+1]["Offset"] - len(self)
            else:
                prop_sz = sz - len(self)
            prop_info = self.properties_dict.get(prop["Type"], ("Unknown", None))
            yield SummaryProperty(prop_sz, name=prop_info[0])
        if len(self) < sz:
            yield Unused(sz - len(self), name="Padding")


class Summary(Struct):

    def __init__(self, is_doc,  *args, **kwargs):
        Struct.__init__(self, *args, **kwargs)
        self.is_doc = is_doc

    def parse(self):
        hdr = yield SummaryInformationHeader(name="Header", category=Type.HEADER)
        if self.is_doc:
            props = DOCUMENT_PROPERTY
        else:
            props = BASE_PROPERTY
        for seci, section in enumerate(hdr["Sections"]):
            yield SummarySection(props, name=section["Name"], parent=hdr, category=Type.META)


    
class SummaryAnalyzer(FileTypeAnalyzer):
    category = malcat.FileType.DOCUMENT
    name = "Office.Summary"

    def parse(self, hint):
        self.summary_metadata = {}
        self.codec = None
        name = "".join(filter(lambda c: c in string.printable, hint)) 
        hdr = yield SummaryInformationHeader(name="Header", category=Type.HEADER)
        if "Document" in name:
            props = DOCUMENT_PROPERTY
        else:
            props = BASE_PROPERTY
        for seci, section in enumerate(hdr["Sections"]):
            meta = {}
            ss = yield SummarySection(props, name=section["Name"], parent=hdr, category=Type.META)
            for i in range(3, ss.count):
                item = ss.at(i)
                if not isinstance(item, malcat.FieldAccess):
                    continue
                if not "Data" in item:
                    continue
                data = item["Data"]
                if isinstance(data, malcat.FieldAccess):
                    if "String" in data:
                        data = data["String"].replace("\x00", "").strip()
                    elif "Bytes" in data and data.size < 1024:
                        data = data["Bytes"]
                        if self.codec is not None:
                            data = data.decode(self.codec, errors="replace")
                        else:
                            data = "{{{}}}".format(data.hex())
                        data = data.replace("\x00", "").strip()
                elif isinstance(data, datetime.datetime):
                    data = data.strftime("%Y-%m-%d %H:%M:%S")
                if not data or isinstance(data, malcat.FieldAccess):
                    continue
                meta[item.name] = str(data)
                if item.name == "CODEPAGE":
                    self.codec = codepage2codec(data)
            self.summary_metadata["{}".format(seci)] = meta


######################################################################################

class CompObjHeader(Struct):
    def parse(self):
        yield Unused(4)
        yield UInt32(name="Version")
        yield Unused(20)


class CompObjStream(Struct):
    
    def parse(self):
        yield PascalString(name="AnsiUserType", comment="display name of the linked object or embedded object")
        marker, = struct.unpack("<I", self.look_ahead(4))
        if marker < 0xfffffffe:
            yield PascalString(name="ClipboardFormat", comment="clipboard format as ansi string")
        else:
            yield UInt32(name="Marker")
            yield UInt32(name="ClipboardFormatEnum", values=[
                ("CF_BITMAP", 2),
                ("CF_METAFILEPICT", 3),
                ("CF_DIB", 8),
                ("CF_ENHMETAFILE", 14),
            ])
        yield PascalString(name="Reserved", comment="")
        yield UInt32(name="UnicodeMarker", comment="always 0x71B239F4")
        yield UnicodeString(name="UnicodeUserType", comment="display name of the linked object or embedded object")
        marker, = struct.unpack("<I", self.look_ahead(4))
        if marker < 0xfffffffe:
            yield UnicodeString(name="UnicodeClipboardFormat", comment="clipboard format as unicode string")
        else:
            yield UInt32(name="Marker")
            yield UInt32(name="ClipboardFormatEnum", values=[
                ("CF_BITMAP", 2),
                ("CF_METAFILEPICT", 3),
                ("CF_DIB", 8),
                ("CF_ENHMETAFILE", 14),
            ])
        yield UnicodeString(name="Reserved2", comment="")




class CompObjAnalyzer(FileTypeAnalyzer):
    category = malcat.FileType.DOCUMENT
    name = "Office.CompObj"

    def parse(self, hint):
        self.confirm()
        yield CompObjHeader(category=Type.HEADER)
        yield CompObjStream(category=Type.HEADER)

######################################################################################

class OleNativeStream(Struct):
    
    def parse(self):
        sz = yield UInt32(name="NativeDataSize", comment="this values is actually ignored by office when inside a stream")
        yield Bytes(min(sz, self.remaining()), name="NativeData")


class OleNativeAnalyzer(FileTypeAnalyzer):
    category = malcat.FileType.DOCUMENT
    name = "Office.OleNative"

    def parse(self, hint):
        yield OleNativeStream(category=Type.DATA)


######################################################################################


class MonikerStream(Struct):
    
    def __init__(self, size, *args, **kwargs):
        Struct.__init__(self, *args, **kwargs)
        self.size = size

    def parse(self):
        guid = yield GUID(name="Clsid", comment="implementation-specific object capable of processing the data contained in the StreamData field")
        if guid == "00000309-0000-0000-C000-000000000046":
            # compositemoniker
            num = yield UInt32(name="NumMoniker")
            for i in range(num):
                yield MonikerStream(0, name="Item.{}".format(i))
        elif guid == "00000303-0000-0000-C000-000000000046":
            yield FileMoniker()
        elif guid == "00000304-0000-0000-C000-000000000046":
            yield ItemMoniker()
        elif guid == "00000305-0000-0000-C000-000000000046":
            #AntiMoniker
            yield UInt32(name="CountAnti")
        elif guid == "79EAC9E0-BAF9-11CE-8C82-00AA004BA90B":
            yield UrlMoniker()
        elif len(self) < self.size:
            yield Bytes(self.size - len(self), name="Data")


class ItemMoniker(Struct):
  
    def parse(self):
        sz = yield UInt32(name="DelimiterSize")
        yield CString(name="DelimiterAnsi")
        if sz > len(self) - 4:
            yield CStringUtf16le(name="DelimiterUnicode")
        sz = yield UInt32(name="ItemSize")
        start = len(self)
        yield CString(name="ItemAnsi")
        if len(self) - start > sz:
            yield CStringUtf16le(name="ItemUnicode")


class FileMoniker(Struct):
  
    def parse(self):
        yield UInt16(name="Anti", comment="specifies the number of parent directory indicators at the beginning of the ansiPath field")
        sz = yield UInt32(name="AnsiLength")
        yield String(sz, name="AnsiPath")
        yield UInt16(name="EndServer", comment="specifies the number of Unicode characters used to specify the server portion of the path if the path is a UNC path (including the leading \\\\). If the path is not a UNC path, this field MUST equal 0xFFFF")
        yield UInt16(name="Version", comment="specifies the version number of this file moniker serialization implementation. MUST equal 0xDEAD")
        yield Unused(20)
        sz = yield UInt32(name="UnicodePathength")
        if sz > 0:
            sz = yield UInt32(name="UnicodePathBytes")
            yield UInt16(name="KeyValue")
            yield StringUtf16le(sz//2, zero_terminated=False)

class UrlMoniker(Struct):
  
    def parse(self):
        sz = yield UInt32(name="Size")
        yield CStringUtf16le(name="Url")
        if sz == len(self) + 24 - 4:
            yield GUID(name="SerialGuid", comment="implementation of the URL moniker serialization")
            yield UInt32(name="SerialVersion", comment="unsigned integer that specifies the version number of this implementation of the URL moniker serialization. This field MUST equal 0 if present")
            yield BitsField(
                Bit(name="createAllowRelative", comment="specifies that if the URI scheme is unspecified and not implicitly 'file,' a relative scheme is assumed during creation of the URI"),
                Bit(name="createAllowImplicitWildcardScheme", comment="specifies that if the URI scheme is unspecified and not implicitly 'file,' a wildcard scheme is assumed during creation of the URI"),
                Bit(name="createAllowImplicitFileScheme", comment="specifies that if the URI scheme is unspecified and the URI begins with a drive letter or a UNC path, a file scheme is assumed during creation of the URI"),
                Bit(name="createNoFrag", comment="specifies that if a URI query string is present, the URI fragment is not looked for during creation of the URI"),
                Bit(name="createNoCanonicalize", comment="specifies that the scheme, host, authority, path, and fragment will not be canonicalized during creation of the URI. This value MUST be 0 if createCanonicalize equals 1"),
                Bit(name="createCanonicalize", comment="specifies that the scheme, host, authority, path, and fragment will be canonicalized during creation of the URI. This value MUST be 0 if createNoCanonicalize equals 1"),
                Bit(name="createFileUseDosPath", comment="specifies that MS-DOS path compatibility mode will be used during creation of file URIs"),
                Bit(name="createDecodeExtraInfo", comment="specifies that percent encoding and percent decoding canonicalizations will be performed on the URI query and URI fragment during creation of the URI. This field takes precedence over the createNoCanonicalize field"),
                Bit(name="createNoDecodeExtraInfo", comment="specifies that percent encoding and percent decoding canonicalizations will not be performed on the URI query and URI fragment during creation of the URI. This field takes precedence over the createCanonicalize field. This value MUST be 0 if createDecodeExtraInfo equals 1"),
                Bit(name="createCrackUnknownSchemes", comment="specifies that hierarchical URIs with unrecognized URI schemes will be treated like hierarchical URIs during creation of the URI. This value MUST be 0 if createNoCrackUnknownSchemes equals 1"),
                Bit(name="createNoCrackUnknownSchemes", comment="specifies that hierarchical URIs with unrecognized URI schemes will be treated like opaque URIs during creation of the URI"),
                Bit(name="createPreProcessHtmlUri", comment="specifies that preprocessing will be performed on the URI to remove control characters and white space during creation of the URI"),
                Bit(name="createNoPreProcessHtmlUri", comment="specifies that preprocessing will not be performed on the URI to remove control characters and white space during creation of the URI"),
                Bit(name="createIESettings", comment="specifies that registry settings will be used to determine default URL parsing behavior during creation of the URI"),
                Bit(name="createNoIESettings", comment="specifies that registry settings will not be used to determine default URL parsing behavior during creation of the URI"),
                Bit(name="createNoEncodeForbiddenCharacters", comment="specifies that URI characters forbidden in [RFC3986] will not be percent-encoded during creation of the URI"),
                NullBits(16),
                name="Flags"
            )


class OleStream(Struct):
    
    def parse(self):
        v = yield UInt32(name="Version")
        if v != 0x02000001:
            raise FatalError("invalid version")
        flags = yield BitsField(
            Bit(name="IsLink", comment="ole stream is for a link object"),
            NullBits(31),
            name="Flags"
        )
        yield UInt32(name="LinkUpdateOption", comment="contains an implementation-specific hint supplied by the application or by a higher-level protocol that creates the data structure")
        yield Unused(4)
        mssz = yield UInt32(name="ReservedMonikerStreamSize", comment="the size, in bytes, of the ReservedMonikerStream field plus the size of this field")
        if mssz >= 20:
            yield MonikerStream(mssz - 4, name="ReservedMonikerStream")
        if not flags["IsLink"]:
            return
        mssz = yield UInt32(name="RelativeSourceMonikerStreamSize", comment="the size, in bytes, of the RelativeSourceMonikerStream field plus the size of this field")
        if mssz >= 20:
            yield MonikerStream(mssz - 4, name="RelativeSourceMonikerStream")
        mssz = yield UInt32(name="AbsoluteSourceMonikerStreamSize", comment="the size, in bytes, of the AbsoluteSourceMonikerStream field plus the size of this field")
        if mssz >= 20:
            yield MonikerStream(mssz - 4, name="AbsoluteSourceMonikerStream")
        yield UInt32(name="ClsidIndicator", comment="must be 0xffffffff")
        yield GUID(name="Clsid", comment="object class GUID of the creating application")
        yield UnicodeString(name="ReservedDisplayName")
        yield Unused(4)
        yield Filetime(name="LocalUpdateTime", comment="time when the container application last updated the RemoteUpdateTime field")
        yield Filetime(name="LocalCheckUpdateTime", comment="time when the container application last checked the update time of the linked object")
        yield Filetime(name="RemoteUpdateTime", comment="time when the linked object was last updated")


class OleStreamAnalyzer(FileTypeAnalyzer):
    category = malcat.FileType.DOCUMENT
    name = "Office.OleStream"

    def parse(self, hint):
        yield OleStream()


######################################################################################

class ODT(Struct):
    
    def parse(self):
        yield BitsField(
            NullBits(1),
            Bit(name="DefHandler", comment="if this bit is 1, then the application MUST assume that this OLE object’s class identifier (CLSID) is {00020907-0000-0000-C000-000000000046}"),
            NullBits(2),
            Bit(name="Link", comment="object is a link"),
            NullBits(1),
            Bit(name="Icon", comment="whether this OLE object is being represented by an icon"),
            Bit(name="IsOle1", comment="whether this OLE object is only compatible with OLE 1. If this bit is zero, then the object is compatible with OLE 2"),
            Bit(name="Manual", comment="specifies whether the user has requested that this OLE object only be updated in response to a user action. If fManual is zero, then the user has requested that this OLE object update automatically. If fLink is zero, then fManual is undefined and MUST be ignored"),
            Bit(name="RecomposeOnResize", comment="specifies whether this OLE object has requested to be notified when it is resized by its container"),
            NullBits(2),
            Bit(name="OCX", comment="object is an OLE control"),
            Bit(name="Stream", comment="specifies whether this OLE control stores its data in a single stream instead of a storage"),
            NullBits(1),
            Bit(name="ViewObject", comment="specifies whether this OLE object supports the IViewObject interface"),
            name="ODTPersist1"
        )
        yield UInt16(name="Format", values=[
            ("Rich Text Format", 1),
            ("Text Format", 2),
            ("Metafile", 3),
            ("Bitmap", 4),
            ("DIBitmap", 5),
            ("HTML", 10),
            ("Unicode Text", 20),
        ])
        if self.remaining() >= 2:
            yield BitsField(
            Bit(name="EMF", comment="specifies that the presentation of this OLE object in the document is in the Enhanced Metafile format"),
            NullBits(1),
            Bit(name="QueriedEMF", comment="specifies whether the application that saved this Word Binary file had queried this OLE object to determine whether it supported the Enhanced Metafile format"),
            Bit(name="StoredAsEMF", comment="specifies that this OLE object supports the Enhanced Metafile format"),
            NullBits(12),
            name="ODTPersist2"
            )

        

class OleObjectInfo(FileTypeAnalyzer):
    category = malcat.FileType.DOCUMENT
    name = "Office.ObjInfo"

    def parse(self, hint):
        yield ODT()

######################################################################################


def parse_vba(self):
    vba_root = None
    dirpath = None
    vba_modules = []
    meta = {}
    # find VBA root
    for i, entry in enumerate(self.directory):
        if entry["ObjectType"] in (1, 5):
            path = "/".join(self.get_entry_path(i))
            if len(path) > 4 and path.endswith("/VBA"):
                path = path[:-4]    # handle <DETACHED>/VBA stuff or empty VBA dirs
            ok = True
            for subpath in ("VBA/dir", "VBA/_VBA_PROJECT", "PROJECT"):
                subpath = "/".join([path, subpath])
                if not subpath in self.filesystem:
                    ok = False
                    break
            if ok:
                vba_root = path
    if vba_root is not None:
        vbapath = "/".join([vba_root, "VBA"])
        dirpath = "/".join([vbapath, "dir"])
        try:
            data = self.unpack_buffer(self.read_entry(self.filesystem[dirpath]))
            fake_file = malcat.FileBuffer(data, "VBADIR")
            parser = VBADirAnalyzer()
            parser.run(fake_file)
            if "ProjectInformation" in parser:
                meta["Name"] = parser["ProjectInformation"]["NameRecord"]["Name"]
                self.vba_codec = codepage2codec(parser["ProjectInformation"]["CodePageRecord"]["CodePage"])
                meta["Encoding"] = self.vba_codec
                meta["Version"] = "{}.{}".format(parser["ProjectInformation"]["VersionRecord"]["VersionMajor"], parser["ProjectInformation"]["VersionRecord"]["VersionMinor"])
            if "Modules" in parser:
                modules = parser["Modules"]
                for i in range(4, modules.count):
                    module = modules[i]
                    if "ModuleStreamNameUnicodeRecord" in module:
                        module_name = module["ModuleStreamNameUnicodeRecord"]["Name"]
                    else:
                        module_name = module["ModuleStreamNameRecord"]["Name"]
                    module_path = "/".join([vba_root, "VBA", module["ModuleStreamNameUnicodeRecord"]["Name"]])
                    module_entry = self.filesystem.get(module_path, None)
                    if module_entry is None:
                        print("No stream entry for module {}".format(module_path))
                    else:
                        vba_modules.append((module_path, module["ModuleOffsetRecord"]["TextOffset"]))
        except BaseException as e:
            print("Error while scanning VBA project file at {}: switching to aggressive module scanning".format(dirpath))
            dirpath = None
            vba_modules = []

        # scan aggressively for VBA compressed code to counter VBA Purging
        known_vba_modules = set([x[0] for x in vba_modules])
        for fp, entry in self.filesystem.items():
            try:
                if fp.startswith(vbapath) and not fp in known_vba_modules:
                    data = self.read_entry(entry)
                    # trick from olevba
                    result = list(re.finditer(b'\\x00Attribut[^e]', data))
                    if result:
                        vba_modules.append((fp, result[0].start() - 3))
            except BaseException as e:
                print(e)

    return dirpath, vba_modules, meta

def decompile_vba(self):
    if not self.vba_modules:
        return "'no VBA module found - nothing to decompile"
    res = ""
    for module_path, text_offset in self.vba_modules:
        res += "\n'=============================== {} ===============================\n".format(module_path)
        stream_content = self.read_entry(self.filesystem[module_path])
        if not text_offset:
            res += "\n'/!\\ VBA p-code has been deleted, only source code is left (VBA purging)\n'Using heuristics to find start of source code ... \n\n"
            result = list(re.finditer(b'\\x00Attribut[^e]', stream_content))
            if result:
                text_offset = result[0].start() - 3
        try:
            text_content = self.unpack_buffer(stream_content[text_offset:])
            text_content = text_content.decode(self.vba_codec, errors="replace")
            res += text_content.replace("\x00", "\\x00")
            res += "\n\n"
        except BaseException as e:
            import traceback
            res += traceback.format_exc()
    return res
