from transforms.base import Transform, TransformError
import re

STANDARD_CODECS = ("ascii", "utf-8", "utf-16le", "utf-16-be", "utf-32le", "utf-32be", "cp437", "cp850", "cp866", "cp1252", "latin1", "gb2312")

class HtmlUnquote(Transform):
    """
    Unquote HTML URI (stuff like %2f)
    """
    category = "text / scripts"
    name = "url unquote"
    icon = "wxART_HTML"

    def run(self, data:bytes):
        import urllib.parse
        s = data.decode("utf8")
        s = urllib.parse.unquote(s)
        return s.encode("utf8")

class HtmlUnescape(Transform):
    """
    Unescape HTML entities like &quot; -> "
    """
    category = "text / scripts"
    name = "html unescape"
    icon = "wxART_HTML"

    def run(self, data:bytes):
        import html
        s = data.decode("utf8")
        s = html.unescape(s)
        return s.encode("utf8")



class Rot13(Transform):
    """
    rot13 encoding/decoding
    """
    category = "text / scripts"
    name = "rot13"
    icon = "wxART_REFRESH"

    def run(self, data:bytes, encoding:STANDARD_CODECS=STANDARD_CODECS[0]):
        import codecs
        return codecs.encode(data.decode(encoding), "rot-13").encode(encoding)


class CaseUp(Transform):
    """
    Set text to upper case
    """
    category = "text / scripts"
    name = "uppercase"
    icon = "wxART_GO_UP"

    def run(self, data:bytes, encoding:STANDARD_CODECS=STANDARD_CODECS[0]):
        decoded = data.decode(encoding)
        res = decoded.upper()
        return res.encode(encoding)


class CaseDown(Transform):
    """
    Set text to lower case
    """
    category = "text / scripts"
    name = "lowercase"
    icon = "wxART_GO_DOWN"

    def run(self, data:bytes, encoding:STANDARD_CODECS=STANDARD_CODECS[0]):
        decoded = data.decode(encoding)
        res = decoded.lower()
        return res.encode(encoding)


class JsonIndent(Transform):
    """
    Beautify a json object using a fixed tab size
    """
    category = "text / scripts"
    name = "json indent"
    icon = "wxART_INDENT"

    def run(self, data:bytes, encoding:STANDARD_CODECS=STANDARD_CODECS[0], tab_size:int=4):
        import json
        json_object = json.loads(data.decode(encoding))
        indented = json.dumps(json_object, indent=tab_size)
        return indented.encode(encoding)



class XmlIndent(Transform):
    """
    Beautify a XML or HTML object using a fixed tab size
    """
    category = "text / scripts"
    name = "xml/html indent"
    icon = "wxART_INDENT"

    def run(self, data:bytes, encoding:STANDARD_CODECS=STANDARD_CODECS[0], tab_size:int=4):
        import os
        import xml.dom.minidom as md
        dom = md.parseString(data.decode(encoding))
        pretty_xml = dom.toprettyxml()
        # remove the weird newline issue:
        pretty_xml = os.linesep.join([s for s in pretty_xml.splitlines() if s.strip()])
        return pretty_xml.encode(encoding)


class JavascriptBeautify(Transform):
    """
    Beautify a a javascript script and try to deobfuscate it. Requires the library jsbeautifier
    """
    category = "text / scripts"
    name = "javascript beautify"
    icon = "wxART_DISASM"

    def run(self, data:bytes, encoding:STANDARD_CODECS=STANDARD_CODECS[0], 
            tab_size:int=4, 
            unescape_strings:bool=True, 
            preserve_newlines:bool=True):
        try:
            import jsbeautifier
        except ImportError:
            raise ValueError("""You need to install the library jsbeautifier first.
Note for windows users: please make sur you check "User system python interpreter" in the options.""")
        opts = jsbeautifier.default_options()
        opts.indent_size = tab_size
        opts.unescape_strings = unescape_strings
        opts.preserve_newlines = preserve_newlines
        res = jsbeautifier.beautify(data.decode(encoding), opts)
        return res.encode(encoding)


class Chr(Transform):
    """
    Replace occurence of Chr(xxx) calls by their value: Chr(65) -> A
    if <quote> is true, character will be enclosed in quotation marks: Chr(65) -> "A"
    """
    category = "text / scripts"
    name = "chr"
    icon = "wxART_UNDO_ALL"

    def run(self, data:bytes, quote:bool=False):
        def rep(m):
            c = chr(int(m.group(1))).encode("utf8")
            if quote:
                c = b'"' + c + b'"'
            return c
        return re.sub(rb"chr\((\d+)\)", rep, data, flags=re.I)



class Calculate(Transform):
    """
    Replace occurences of simple arithmetic integer operations (-+*/) by their result: 5 + 3 * 5 will be replaced with 20.

    If keep_first_parenthesis is true, the first set of parenthesis wil be kept, e.g. (5+7) will be replaced with (12). Useful when the operation is inside a function parameters list.
    """
    category = "text / scripts"
    name = "calculate"
    icon = "wxART_CALC"

    def run(self, data:bytes, keep_first_parenthesis:bool=True):
        def rep(m):
            try:
                expr = m.group(1)
                res = str(eval(expr)).encode("ascii")
                if keep_first_parenthesis and expr.startswith(b"(") and expr.endswith(b")"):
                    res = b"(" + res + b")"
                return res
            except:
                return m.group(0)
        return re.sub(rb"([(\s]*\d+\s*(?:[+*/%-][\s\d()]+)+[\s)]*)", rep, data)


class HexUnescape(Transform):
    """
    Replace occurence of hexadecimal numbers (0x<number>, &H<number>) by their decimal value
    """
    category = "text / scripts"
    name = "hexa2dec"
    icon = "wxART_FIND_HEX"

    def run(self, data:bytes):
        def rep(m):
            return str(int(m.group(1), 16)).encode("ascii")
        return re.sub(rb"(?:\&H|0x)([a-fA-F0-9]{2,8})", rep, data)


class VbObfuscation(Transform):
    """
    Remove classical obfuscation found in VB code like Chr(<char>) or <string> & <string>
    """
    category = "text / scripts"
    name = "visual basic"
    icon = "wxART_UNDO_ALL"

    def run(self, data:bytes):
        data = Chr().run(data, True)
        for r in (
                b"\"\\s*&\\s*\"",
                ):
            data = re.sub(r, b"", data, flags=re.I)   
        return data


class EscapingObfuscation(Transform):
    """
    Remove classical character escaping obfuscation like \\uCCCC or \\xCC or &HCCC or \\CCC when it's in the ascii range by the actual character.
    """
    category = "text / scripts"
    name = "char unescape"
    icon = "wxART_HTML"

    def run(self, data:bytes):
        def replace(m, base=16):
            num = int(m.group(1), base=base)
            if num >= 0x20 and num < 0x80 or num in (7, 10, 13):
                return num.to_bytes(1, byteorder="little")
            return m.group(0)
        for r, b in (
                (b"\\\\u([0-9a-fA-F]{2,4})", 16),
                (b"\\\\x([0-9a-fA-F]{2})", 16),
                (b"\\\\([0-9]{1,3})", 8),
                (b"\\&H([0-9a-fA-F]{2,4})", 16),
                ):
            data = re.sub(r, lambda x: replace(x, b), data, flags=re.I)   
        return data    


class BatchGlobalVariables(Transform):
    """
    Replace all occurrences of variables set via "Set <var>=<value>"
    """
    category = "text / scripts"
    name = "batch global variables"
    icon = "wxART_UNDO_ALL"

    def run(self, data:bytes):
        data = data.decode("utf-8")
        res = []
        vars = {}
        var_re = re.compile(r"^\s*[sS][eE][tT]\s+(?:/[a]\s+)?([a-zA-Z0-9_]+)\s*=(.*?)\r*\n*$")
        use_re = re.compile(r"%(\w+)%")

        def replace(m):
            v = vars.get(m.group(1))
            if v is not None:
                return v
            else:
                return m.group(0)

        for l in data.split("\n"):
            old = ""
            while old != l:
                old = l
                l = use_re.sub(replace, l)
            m = var_re.match(l)
            if m:
                vars[m.group(1)] = m.group(2)
            else:
                res.append(l)

        return "\n".join(res).encode("utf-8")
