New upstream version 3.3.8

parent cf360915
:[diStorm4}:
:[diStorm3}:
The ultimate disassembler library.
Copyright (c) 2003-2016, Gil Dabah
Copyright (c) 2003-2018, Gil Dabah
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. All advertising materials mentioning features or use of this software
must display the following acknowledgement:
This product includes software developed by Gil Dabah.
4. Neither the name of Gil Dabah nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Gil Dabah nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY Gil Dabah ''AS IS'' AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL Gil Dabah BE LIABLE FOR ANY
DISCLAIMED. IN NO EVENT SHALL GIL DABAH BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
......@@ -4,7 +4,7 @@ setup.py
include\distorm.h
include\mnemonics.h
python\distorm3\__init__.py
python\distorm3\sample.py
examples\python\sample.py
src\config.h
src\decoder.c
src\decoder.h
......@@ -23,3 +23,8 @@ src\textdefs.h
src\wstring.c
src\wstring.h
src\x86defs.h
make\win32\cdistorm.vcxproj
make\win32\cdistorm.vcxproj.filters
make\win32\distorm.sln
make\win32\resource.h
make\win32\Resource.rc
include COPYING setup.cfg setup.py
include make\win32\cdistorm.vcxproj make\win32\cdistorm.vcxproj.filters make\win32\distorm.sln make\win32\resource.h make\win32\Resource.rc
recursive-include src *.c *.h
recursive-include include *.c *.h
recursive-include . *.py
\ No newline at end of file
recursive-include . *.py
......@@ -9,4 +9,11 @@ diStorm3 is super lightweight (~45KB), ultra fast and easy to use (a single API)
"We benchmarked five popular open-source disassembly libraries and chose diStorm3, which had the best performance (and furthermore, has complete 64-bit support).", July 2014, Quoting David Williams-King in his Thesis about Binary Shuffling.
diStorm3.3.3 is now licensed under BSD!
\ No newline at end of file
diStorm3 is licensed under BSD!
Installing diStorm3 -
Clone repo locally and then 'python setup.py install' or alternatively: 'python -m pip install distorm3'.
For Windows, use these pre-built installers in https://pypi.org/manage/project/distorm3/release/3.3.8/.
RTFM, the wiki has plenty of info.
#
# disOps.py v 1.0.0
#
# Copyright (C) 2003-2012 Gil Dabah, http://ragestorm.net/distorm/
# Copyright (C) 2003-2018 Gil Dabah, http://ragestorm.net/distorm/
#
# disOps is a part of the diStorm project, but can be used for anything.
# The generated output is tightly coupled with diStorm data structures which can be found at instructions.h.
......@@ -42,98 +42,19 @@
# To maximize the usage of this DB, one should learn the documentation of diStorm regarding the InstFlag and Operands Types.
#
import re
import time
import functools
import x86sets
import x86db
from x86header import *
mnemonicsIds = {} # mnemonic : offset to mnemonics table of strings.
idsCounter = len("undefined") + 2 # Starts immediately after this one.
SSECmpTypes = ["EQ", "LT", "LE", "UNORD", "NEQ", "NLT", "NLE", "ORD"]
AVXCmpTypes = ["EQ", "LT", "LE", "UNORD", "NEQ", "NLT", "NLE", "ORD",
"EQ_UQ", "NGE", "NGT", "FALSE", "NEQ_OQ", "GE", "GT", "TRUE",
"EQ_OS", "LT_OQ", "LE_OQ", "UNORD_S", "NEQ_US", "NLT_UQ", "NLE_UQ", "ORD_S",
"EQ_US", "NGE_UQ", "NGT_UQ", "FALSE_OS", "NEQ_OS", "GE_OQ", "GT_OQ", "TRUE_US"]
# Support SSE pseudo compare instructions. We will have to add them manually.
def FixPseudo(mnems):
return [mnems[0] + i + mnems[1] for i in SSECmpTypes]
# Support AVX pseudo compare instructions. We will have to add them manually.
def FixPseudo2(mnems):
return [mnems[0] + i + mnems[1] for i in AVXCmpTypes]
def TranslateMnemonics(pseudoClassType, mnems):
global mnemonicsIds
global idsCounter
l = []
if pseudoClassType == ISetClass.SSE or pseudoClassType == ISetClass.SSE2:
mnems = FixPseudo(mnems)
elif pseudoClassType == ISetClass.AVX:
mnems = FixPseudo2(mnems)
for i in mnems:
if len(i) == 0:
# Some mnemonics are empty on purpose because they're not used.
# Set them to zero to keep the order of the list.
l.append(0) # Undefined instruction.
continue
if mnemonicsIds.has_key(i):
l.append(mnemonicsIds[i])
else:
mnemonicsIds[i] = idsCounter
l.append(idsCounter)
idsCounter += len(i) + 2 # For len/null chars.
if idsCounter >= 2**16:
raise "opcodeId is too big to fit into uint16_t"
return l
# All VIAL and diStorm3 code are based on the order of this list, do NOT edit!
REGISTERS = [
"RAX", "RCX", "RDX", "RBX", "RSP", "RBP", "RSI", "RDI", "R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15", "XX",
"EAX", "ECX", "EDX", "EBX", "ESP", "EBP", "ESI", "EDI", "R8D", "R9D", "R10D", "R11D", "R12D", "R13D", "R14D", "R15D", "XX",
"AX", "CX", "DX", "BX", "SP", "BP", "SI", "DI", "R8W", "R9W", "R10W", "R11W", "R12W", "R13W", "R14W", "R15W", "XX",
"AL", "CL", "DL", "BL", "AH", "CH", "DH", "BH", "R8B", "R9B", "R10B", "R11B", "R12B", "R13B", "R14B", "R15B", "XX",
"SPL", "BPL", "SIL", "DIL", "XX",
"ES", "CS", "SS", "DS", "FS", "GS", "XX",
"RIP", "XX",
"ST0", "ST1", "ST2", "ST3", "ST4", "ST5", "ST6", "ST7", "XX",
"MM0", "MM1", "MM2", "MM3", "MM4", "MM5", "MM6", "MM7", "XX",
"XMM0", "XMM1", "XMM2", "XMM3", "XMM4", "XMM5", "XMM6", "XMM7", "XMM8", "XMM9", "XMM10", "XMM11", "XMM12", "XMM13", "XMM14", "XMM15", "XX",
"YMM0", "YMM1", "YMM2", "YMM3", "YMM4", "YMM5", "YMM6", "YMM7", "YMM8", "YMM9", "YMM10", "YMM11", "YMM12", "YMM13", "YMM14", "YMM15", "XX",
"CR0", "", "CR2", "CR3", "CR4", "", "", "", "CR8", "XX",
"DR0", "DR1", "DR2", "DR3", "", "", "DR6", "DR7"]
def CreatePythonDict(mnemonicsIds):
""" Create the opcodes dictionary for Python. """
s = "\n"
for i in mnemonicsIds:
s += "0x%x: \"%s\", " % (mnemonicsIds[i], i)
if len(s) - s.rfind("\n") >= 76:
s = s[:-1] + "\n"
open("python_output.txt", "w").write(s)
def CreateJavaDict(mnemonicsIds):
""" Create the opcodes dictionary/enum for Java. """
s = "\nmOpcodes.put(0, OpcodeEnum.UNDEFINED);\n"
for i in mnemonicsIds:
s += "mOpcodes.put(0x%x, OpcodeEnum.%s);\n" % (mnemonicsIds[i], i.replace(" ", "_").replace(",", ""))
open("java_enums.txt", "w").write(s)
s = "\nUNDEFINED, "
for i in mnemonicsIds:
s += "%s, " % (i.replace(" ", "_").replace(",", ""))
if len(s) - s.rfind("\n") >= 76:
s = s[:-1] + "\n"
open("java_mnemonics.txt", "w").write(s)
def DumpMnemonics():
global mnemonicsIds
import x86generator
f = open("defs.txt", "w")
# Work with multi line and dot-all.
reFlags = re.M | re.S
f.write("typedef enum {\n\tI_UNDEFINED = 0, ")
def CreateMnemonicsC(mnemonicsIds):
""" Create the opcodes arrays for C header files. """
opsEnum = "typedef enum {\n\tI_UNDEFINED = 0, "
pos = 0
l2 = sorted(mnemonicsIds.keys())
for i in l2:
......@@ -146,650 +67,149 @@ def DumpMnemonics():
pos = 0
elif i != l2[-1]:
s += " "
f.write(s)
f.write("\n} _InstructionType;\n\n")
regsText = "const _WRegister _REGISTERS[] = {\n\t"
regsEnum = "typedef enum {\n\t"
old = "*"
unused = 0
for i in REGISTERS:
if old != "*":
if old == "XX":
regsText += "\n\t"
regsEnum += "\n\t"
old = i
continue
else:
regsText += "{%d, \"%s\"}," % (len(old), old)
if len(old):
regsEnum += "R_%s," % old
else:
regsEnum += "R_UNUSED%d," % unused
unused += 1
if i != "XX":
regsText += " "
regsEnum += " "
old = i
regsText += "{%d, \"%s\"}\n};\n" % (len(old), old)
regsEnum += "R_" + old + "\n} _RegisterType;\n"
f.write(regsEnum + "\n")
opsEnum += s
opsEnum += "\n} _InstructionType;"
# Mnemonics are sorted by insertion order. (Psuedo mnemonics depend on this!)
s = "const unsigned char _MNEMONICS[] =\n\"\\x09\" \"UNDEFINED\\0\" "
l = zip(mnemonicsIds.keys(), mnemonicsIds.values())
l.sort(lambda x, y: x[1] - y[1])
# NOTE: EXTRA BACKSLASHES FORE RE.SUB !!!
s = "const unsigned char _MNEMONICS[] =\n\"\\\\x09\" \"UNDEFINED\\\\0\" "
l = list(zip(mnemonicsIds.keys(), mnemonicsIds.values()))
l = sorted(l, key=functools.cmp_to_key(lambda x, y: x[1] - y[1]))
for i in l:
s += "\"\\x%02x\" \"%s\\0\" " % (len(i[0]), i[0])
s += "\"\\\\x%02x\" \"%s\\\\0\" " % (len(i[0]), i[0])
if len(s) - s.rfind("\n") >= 76:
s += "\\\n"
s = s[:-1] + ";\n\n" # Ignore last space.
f.write(s)
f.write(regsText + "\n")
f.close()
# Used for Python/Java dictionary of opcodeIds-->mnemonics.
CreatePythonDict(mnemonicsIds)
CreateJavaDict(mnemonicsIds)
O_NONE = 0
# REG standalone
O_REG = 1
# IMM standalone
O_IMM = 2
# IMM_1 standalone
O_IMM_1 = 4
# IMM_2 standalone
O_IMM_2 = 5
# DISP standlone
O_DISP = 3
# MEM uses DISP
O_MEM = 3
# PC uses IMM
O_PC = 2
# PTR uses IMM
O_PTR = 2
_OPT2T = {OperandType.NONE : O_NONE,
OperandType.IMM8 : O_IMM,
OperandType.IMM16 : O_IMM,
OperandType.IMM_FULL : O_IMM,
OperandType.IMM32 : O_IMM,
OperandType.SEIMM8 : O_IMM,
OperandType.IMM16_1 : O_IMM_1,
OperandType.IMM8_1 : O_IMM_1,
OperandType.IMM8_2 : O_IMM_2,
OperandType.REG8 : O_REG,
OperandType.REG16 : O_REG,
OperandType.REG_FULL : O_REG,
OperandType.REG32 : O_REG,
OperandType.REG32_64 : O_REG,
OperandType.FREG32_64_RM : O_REG,
OperandType.RM8 : O_MEM,
OperandType.RM16 : O_MEM,
OperandType.RM_FULL : O_MEM,
OperandType.RM32_64 : O_MEM,
OperandType.RM16_32 : O_MEM,
OperandType.FPUM16 : O_MEM,
OperandType.FPUM32 : O_MEM,
OperandType.FPUM64 : O_MEM,
OperandType.FPUM80 : O_MEM,
OperandType.R32_M8 : O_MEM,
OperandType.R32_M16 : O_MEM,
OperandType.R32_64_M8 : O_MEM,
OperandType.R32_64_M16 : O_MEM,
OperandType.RFULL_M16 : O_MEM,
OperandType.CREG : O_REG,
OperandType.DREG : O_REG,
OperandType.SREG : O_REG,
OperandType.SEG : O_REG,
OperandType.ACC8 : O_REG,
OperandType.ACC16 : O_REG,
OperandType.ACC_FULL : O_REG,
OperandType.ACC_FULL_NOT64 : O_REG,
OperandType.MEM16_FULL : O_MEM,
OperandType.PTR16_FULL : O_PTR,
OperandType.MEM16_3264 : O_MEM,
OperandType.RELCB : O_PC,
OperandType.RELC_FULL : O_PC,
OperandType.MEM : O_MEM,
OperandType.MEM_OPT : O_MEM,
OperandType.MEM32 : O_MEM,
OperandType.MEM32_64 : O_MEM,
OperandType.MEM64 : O_MEM,
OperandType.MEM128 : O_MEM,
OperandType.MEM64_128 : O_MEM,
OperandType.MOFFS8 : O_MEM,
OperandType.MOFFS_FULL : O_MEM,
OperandType.CONST1 : O_IMM,
OperandType.REGCL : O_REG,
OperandType.IB_RB : O_REG,
OperandType.IB_R_FULL : O_REG,
OperandType.REGI_ESI : O_MEM,
OperandType.REGI_EDI : O_MEM,
OperandType.REGI_EBXAL : O_MEM,
OperandType.REGI_EAX : O_MEM,
OperandType.REGDX : O_REG,
OperandType.REGECX : O_REG,
OperandType.FPU_SI : O_REG,
OperandType.FPU_SSI : O_REG,
OperandType.FPU_SIS : O_REG,
OperandType.MM : O_REG,
OperandType.MM_RM : O_REG,
OperandType.MM32 : O_MEM,
OperandType.MM64 : O_MEM,
OperandType.XMM : O_REG,
OperandType.XMM_RM : O_REG,
OperandType.XMM16 : O_MEM,
OperandType.XMM32 : O_MEM,
OperandType.XMM64 : O_MEM,
OperandType.XMM128 : O_MEM,
OperandType.REGXMM0 : O_REG,
OperandType.RM32 : O_MEM,
OperandType.REG32_64_M8 : O_MEM,
OperandType.REG32_64_M16 : O_MEM,
OperandType.WREG32_64 : O_REG,
OperandType.WRM32_64 : O_REG,
OperandType.WXMM32_64 : O_MEM,
OperandType.VXMM : O_REG,
OperandType.XMM_IMM : O_IMM,
OperandType.YXMM : O_REG,
OperandType.YXMM_IMM : O_REG,
OperandType.YMM : O_REG,
OperandType.YMM256 : O_MEM,
OperandType.VYMM : O_REG,
OperandType.VYXMM : O_REG,
OperandType.YXMM64_256 : O_MEM,
OperandType.YXMM128_256 : O_MEM,
OperandType.LXMM64_128 : O_MEM,
OperandType.LMEM128_256 : O_MEM
}
def CheckOTCollisions(ii):
""" Checks whether an instruction has two or more operands that use the same fields in the diStorm3 structure.
E.G: ENTER 0x10, 0x1 --> This instruction uses two OT_IMM, which will cause a collision and use the same field twice which is bougs. """
types = map(lambda x: _OPT2T[x], ii.operands)
# Regs cannot cause a collision, since each register is stored inside the operand itself.
for i in types:
if i != O_REG and types.count(i) > 1:
print "**WARNING: Operand type collision for instruction: " + ii.mnemonics[0], ii.tag
break
# This fucntion for certain flow control related instructions will set their type.
def UpdateForFlowControl(ii):
if ii.mnemonics[0].find("CMOV") == 0:
ii.flowControl = FlowControl.CMOV
return
# Should I include SYSCALL ?
pairs = [
(["INT", "INT1", "INT 3", "INTO", "UD2"], FlowControl.INT),
(["CALL", "CALL FAR"], FlowControl.CALL),
(["RET", "IRET", "RETF"], FlowControl.RET),
(["SYSCALL", "SYSENTER", "SYSRET", "SYSEXIT"], FlowControl.SYS),
(["JMP", "JMP FAR"], FlowControl.UNC_BRANCH),
(["JCXZ", "JO", "JNO", "JB", "JAE", "JZ", "JNZ", "JBE", "JA", "JS", "JNS", "JP", "JNP", "JL", "JGE", "JLE", "JG", "LOOP", "LOOPZ", "LOOPNZ"], FlowControl.CND_BRANCH)
]
ii.flowControl = 0
for p in pairs:
if ii.mnemonics[0] in p[0]:
ii.flowControl = p[1]
return
def UpdateWritableDestinationOperand(ii):
" Mark dst-wrt flag for all Integer instructions that write to GPR/mem. "
prefixes = ["MOV", "SET", "CMOV", "CMPXCHG"]
for i in prefixes:
if ii.mnemonics[0].find(i) == 0:
ii.flags |= InstFlag.DST_WR
return
mnemonics = [
"ADD", "OR", "ADC", "SBB", "AND", "SUB", "XOR", "INC", "DEC", "LEA", "XCHG",
"ROL", "ROR", "RCL", "RCR", "SHL", "SHR", "SAL", "SAR", "SHLD", "SHRD",
"NEG", "NOT", "MUL", "IMUL", "DIV", "IDIV",
"POP", "BTR", "BTS", "BTC", "XADD", "BSWAP",
"LZCNT", "MOVBE", "POPCNT", "CRC32", "SMSW"
]
for i in mnemonics:
if ii.mnemonics[0] in i:
ii.flags |= InstFlag.DST_WR
return
# Make sure it's an FPU instruction before we continue.
if ii.classType != ISetClass.FPU:
return
fpu_mnemonics = [
"FSTENV", "FSTCW", "FSAVE", "FSTSW", "FST", "FSTP", "FNSTENV", "FNSTCW",
"FIST", "FISTP", "FNSAVE", "FBSTP", "FNSTSW"
]
for i in fpu_mnemonics:
if ii.mnemonics[0] in i:
if len(ii.operands) > 0:
# Ignore operands of FPU STi.
if ii.operands[0] not in [OperandType.FPU_SI, OperandType.FPU_SSI, OperandType.FPU_SIS]:
ii.flags |= InstFlag.DST_WR
return
def UpdatePrivilegedInstruction(opcodeIds, ii):
""" Checks whether a given mnemonic from the given list is privileged,
and changes the relevant opcodeId to indicate so.
Most significant bit of the OpcodeId is the indicator. """
def IsPrivilegedMov(ii):
" Check for MOV instruction with Debug/Control registers which is privileged. "
return (ii.mnemonics[0] == "MOV") and ((OperandType.CREG in ii.operands) or (OperandType.DREG in ii.operands))
privileged = [
"LGDT", "LLDT", "LTR", "LIDT", "LMSW", "CLTS", "INVD",
"WBINVD", "INVLPG", "HLT", "RDMSR", "WRMSR", "RDPMC", "RDTSC",
# IO Sensitive Instructions, mostly allowed by ring0 only.
"IN", "INS", "OUT", "OUTS", "CLI", "STI", "IRET"
]
for i in enumerate(ii.mnemonics):
if (i[1] in privileged) or IsPrivilegedMov(ii):
opcodeIds[i[0]] |= 0x8000
def SetInstructionAffectedFlags(ii, flagsTuple):
""" Helper routine to set the m/t/u flags for an instruction info. """
# Pad tuple for fast access.
if not isinstance(flagsTuple, type(())):
flagsTuple = (flagsTuple,)
flagsTuple += (0,) * (3 - len(flagsTuple))
ii.modifiedFlags = flagsTuple[0]
ii.testedFlags = flagsTuple[1]
ii.undefinedFlags = flagsTuple[2]
def GetTestedFlagsForCondition(cond):
OF, SF, ZF, AF, PF, CF, IF, DF = CPUFlags.OF, CPUFlags.SF, CPUFlags.ZF, CPUFlags.AF, CPUFlags.PF, CPUFlags.CF, CPUFlags.IF, CPUFlags.DF
Conditions = {
"O": OF,
"NO": OF,
"B": CF,
"AE": CF,
"Z": ZF,
"NZ": ZF,
"BE": CF | ZF,
"A": CF | ZF,
"S": SF,
"NS": SF,
"P": PF,
"NP": PF,
"L": SF | OF,
"GE": SF | OF,
"LE": SF | OF | ZF,
"G": SF | OF | ZF,
# Special for FCMOV
"U": PF,
"NU": PF,
"E": ZF,
"NE": ZF,
"NB": CF,
"NBE": CF | ZF
}
# Return tested flags only.
return (0, Conditions[cond], 0)
def UpdateInstructionAffectedFlags(ii):
"""
Add flags for each instruction that is in the following table. We add modified/tested/undefined flags.
Note that some instruction reset specific flags, but we don't record that here, we only care about actually modified ones.
"""
# MNEM: MODIFIED, TEST, UNDEFINED.
OF, SF, ZF, AF, PF, CF, IF, DF = CPUFlags.OF, CPUFlags.SF, CPUFlags.ZF, CPUFlags.AF, CPUFlags.PF, CPUFlags.CF, CPUFlags.IF, CPUFlags.DF
InstByMnem = {
"AAA": (AF | CF, AF, OF | SF | ZF | PF),
"AAS": (AF | CF, AF, OF | SF | ZF | PF),
"AAD": (SF | ZF | PF, 0, OF | AF | CF),
"AAM": (SF | ZF | PF, 0, OF | AF | CF),
"ADC": (OF | SF | ZF | AF | PF | CF, CF),
"ADD": (OF | SF | ZF | AF | PF | CF),
"AND": (OF | SF | ZF | PF | CF, 0, AF),
"ARPL": (ZF),
"BSF": (ZF, 0, OF | SF | ZF | AF | PF | CF),
"BSR": (ZF, 0, OF | SF | ZF | AF | PF | CF),
"BT": (CF, 0, OF | SF | ZF | AF | PF),
"BTS": (CF, 0, OF | SF | ZF | AF | PF),
"BTR": (CF, 0, OF | SF | ZF | AF | PF),
"BTC": (CF, 0, OF | SF | ZF | AF | PF),
"CLC": (CF),
"CLD": (DF),
"CLI": (IF),
"CMC": (CF),
"CMP": (OF | SF | ZF | AF | PF | CF),
"CMPXCHG": (OF | SF | ZF | AF | PF | CF),
"CMPXCHG8B": (ZF),
"CMPXCHG16B": (ZF), # Same inst as previous.
"COMSID": (ZF | PF | CF),
"COMISS": (ZF | PF | CF),
"DAA": (SF | ZF | AF | PF | CF, AF | CF, OF),
"DAS": (SF | ZF | AF | PF | CF, AF | CF, OF),
"DEC": (OF | SF | ZF | AF | PF),
"DIV": (0, 0, OF | SF | ZF | AF | PF | CF),
"FCOMI": (ZF | PF | CF),
"FCOMIP": (ZF | PF | CF),
"FUCOMI": (ZF | PF | CF),
"FUCOMIP": (ZF | PF | CF),
"IDIV": (0, 0, OF | SF | ZF | AF | PF | CF),
"IMUL": (OF | CF, 0, SF | ZF | AF | PF),
"INC": (OF | SF | ZF | AF | PF),
"UCOMSID": (ZF | PF | CF),
"UCOMISS": (ZF | PF | CF),
"IRET": (OF | SF | ZF | AF | PF | CF | IF | DF),
"LAR": (ZF),
"LOOPZ": (0, ZF),
"LOOPNZ": (0, ZF),
"LSL": (ZF),
"LZCNT": (ZF | CF, 0, OF | SF | AF | PF),
"MUL": (OF | CF, 0, SF | ZF | AF | PF),
"NEG": (OF | SF | ZF | AF | PF | CF),
"OR": (SF | ZF | PF, AF),
"POPCNT": (ZF),
"POPF": (OF | SF | ZF | AF | PF | CF | IF | DF),
"RSM": (OF | SF | ZF | AF | PF | CF | IF | DF),
"SAHF": (SF | ZF | AF | PF | CF),
"SBB": (OF | SF | ZF | AF | PF | CF, CF),
"STC": (CF),
"STD": (DF),
"STI": (IF),
"SUB": (OF | SF | ZF | AF | PF | CF),
"TEST": (SF | ZF | PF, 0, AF),
"VERR": (ZF),
"VERW": (ZF),
"XADD": (OF | SF | ZF | AF | PF | CF),
"XOR": (SF | ZF | PF, 0, AF),
# IO/String instructions:
"MOVS": (0, DF),
"LODS": (0, DF),
"STOS": (0, DF),
"CMPS": (OF | SF | ZF | AF | PF | CF, DF),
"SCAS": (OF | SF | ZF | AF | PF | CF, DF),
"INS": (0, DF),
"OUTS": (0, DF)
}
# Check for mnemonics in the above table.
for i in ii.mnemonics:
if InstByMnem.has_key(i) and (ii.flags & InstFlag.PSEUDO_OPCODE) == 0:
SetInstructionAffectedFlags(ii, InstByMnem[i])
return
# Look carefuly for SETcc or Jcc instructions.
for i in ["SET", "CMOV", "FCMOV"]:
if ii.mnemonics[0].find(i) == 0:
SetInstructionAffectedFlags(ii, GetTestedFlagsForCondition(ii.mnemonics[0][len(i):]))
return
# See if it's a Jcc instruction.
if ii.mnemonics[0][:1] == "J" and ii.mnemonics[0][:2] not in ["JM", "JC", "JE", "JR"]:
SetInstructionAffectedFlags(ii, GetTestedFlagsForCondition(ii.mnemonics[0][1:]))
return
# Still no match, try special shift/rotate instructions.
# Special shift/rotate instruction that with constant 1 have different flag affections:
# First tuple is with constant 1, second tuple is with any count (CL).
Shifts = [
(["RCL", "RCR"], (OF | CF, CF), (CF, CF, OF)),
(["ROL", "ROR"], (OF | CF), (CF, 0, OF)),
(["SAL", "SAR", "SHL", "SHR"], (OF | SF | ZF | PF | CF, 0, AF), (SF | ZF | PF | CF, 0, OF | AF)),
(["SHLD", "SHRD"], (OF | SF | ZF | PF | CF, 0, AF), (SF | ZF | PF | CF, 0, OF | AF))
]
for i in Shifts:
for j in i[0]:
if ii.mnemonics[0] == j:
flags = i[1] if ii.operands[1] == OperandType.CONST1 else i[2]
SetInstructionAffectedFlags(ii, flags)
return
# The instruction doesn't affect any flags...
return
# Table to hold shared inst-info.
sharedInfoDict = {}
# Table to hold shared flags.
flagsDict = {}
def FormatInstruction(ii):
""" Formats a string with all information relevant for diStorm InstInfo structure
or the InstInfoEx. These are the internal structures diStorm uses for holding the instructions' information.
Using this structure diStorm knows how to format an opcode when it reads it from the stream.
s += "\\\\\n"
s = s[:-1] + ";" # Ignore last space.
# Return enum & mnemonics.
return (opsEnum, s)
An instruction information structure is found by its byte codes with a prefix of "II_".
So for example ADD EAX, Imm32 instruction is II_00.
Since there are several types of instructions information structures,
the tables which point to these non-default InstInfo structures, will have to cast the pointer. """
# There might be optional fields, if there's a 3rd operand or a second/third mnemonic.
optFields = ""
# Default type of structure is InstInfo.
type = "_InstInfo"
# Make sure the instruction can be fully represented using the diStorm3 _DecodeInst structure.
CheckOTCollisions(ii)
# Add flags for flow control instructions.
UpdateForFlowControl(ii)
# Add flags for writable destination operand.
UpdateWritableDestinationOperand(ii)
# Add affected modified/tested/undefined flags for instruction.
UpdateInstructionAffectedFlags(ii)
# Pad mnemonics to three, in case EXMNEMONIC/2 isn't used (so we don't get an exception).
mnems = TranslateMnemonics([None, ii.classType][(ii.flags & InstFlag.PSEUDO_OPCODE) == InstFlag.PSEUDO_OPCODE], ii.mnemonics) + [0, 0]
# Mark whether the instruction is privileged, by setting MSB of the OpcodeId field.
UpdatePrivilegedInstruction(mnems, ii)
# Pad operands to atleast three (so we don't get an exception too, since there might be instructions with no operands at all).
ops = ii.operands + [OperandType.NONE, OperandType.NONE, OperandType.NONE, OperandType.NONE]
# Is it an extended structure?
if ii.flags & InstFlag.EXTENDED:
# Since there's a second and/or a third mnemonic, use the the InstInfoEx structure.
type = "_InstInfoEx"
flagsEx = 0
# Fix flagsEx to have the VEX flags, except PRE_VEX.
if ii.flags & InstFlag.PRE_VEX:
flagsEx = ii.flags >> InstFlag.FLAGS_EX_START_INDEX
# If there's a third operand, use it, otherwise NONE.
op3 = [OperandType.NONE, ops[2]][(ii.flags & InstFlag.USE_OP3) == InstFlag.USE_OP3]
op4 = [OperandType.NONE, ops[3]][(ii.flags & InstFlag.USE_OP4) == InstFlag.USE_OP4]
if flagsEx >= 256: # Assert the size of flagsEx is enough to holds this value.
raise "FlagsEx exceeded its 8 bits. Change flagsEx of _InstInfoEx to be uint16!"
# Concat the mnemonics and the third operand.
optFields = ", 0x%x, %d, %d, %d, %d" % (flagsEx, op3, op4, mnems[1], mnems[2])
# Notice we filter out internal bits from flags.
flags = ii.flags & ((1 << InstFlag.FLAGS_EX_START_INDEX)-1)
# Allocate a slot for this flag if needed.
if not flagsDict.has_key(flags):
flagsDict[flags] = len(flagsDict)
# Get the flags-index.
flagsIndex = flagsDict[flags]
if flagsIndex >= 256:
raise "FlagsIndex exceeded its 8 bits. Change flags of _InstInfo to be uint16!"
# InstSharedInfo:
sharedInfo = (flagsIndex, ops[1], ops[0], (ii.classType << 3) | ii.flowControl, ii.modifiedFlags, ii.testedFlags, ii.undefinedFlags)
if not sharedInfoDict.has_key(sharedInfo):
sharedInfoDict[sharedInfo] = len(sharedInfoDict)
# Get the shared-info-index.
sharedInfoIndex = sharedInfoDict[sharedInfo]
if sharedInfoIndex >= 2**16:
raise "SharedInfoIndex exceeded its 16 bits. Change type of sharedInfoIndex in _InstInfo!"
fields = "0x%x, %d" % (sharedInfoIndex, mnems[0])
# "Structure-Name" = II_Bytes-Code {Fields + Optional-Fields}.
return ("\t/*II%s*/ {%s%s}" % (ii.tag, fields, optFields), (ii.flags & InstFlag.EXTENDED) != 0)
def FilterTable(table):
# All tables must go to output.
return True
def GeneratePseudoMnemonicOffsets():
"""
Generate the static offset tables for psuedo compare instructions both for SSE and AVX.
The table is built in such a way that each cell holds the offset from the first pseudo mnemonic
to the indexed one.
"""
# Lengths of pesudo mnemonics (SSE=CMPxxxYY + null + lengthByte)
lengths = map(lambda x: 3 + len(x) + 2 + 2, SSECmpTypes)
s = "uint16_t CmpMnemonicOffsets[8] = {\n" + ", ".join</