diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 0000000..2f4f87f --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,12 @@ +{ + "permissions": { + "allow": [ + "Bash(xargs:*)", + "Bash(tree:*)", + "Bash(go build:*)", + "Bash(go run:*)" + ], + "deny": [], + "ask": [] + } +} diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..e6616eb --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,22 @@ +{ + // 使用 IntelliSense 了解相关属性。 + // 悬停以查看现有属性的描述。 + // 欲了解更多信息,请访问: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Launch file", + "type": "go", + "request": "launch", + "mode": "debug", + "program": "${file}" + }, + { + "name": "Launch Package", + "type": "go", + "request": "launch", + "mode": "auto", + "program": "${fileDirname}" + } + ] +} \ No newline at end of file diff --git a/README.md b/README.md index c71a9c6..420e59d 100644 --- a/README.md +++ b/README.md @@ -4,18 +4,213 @@ 未经修改的源代码在 [main 分支](https://github.com/CJYKK/chatlog_backup/tree/main),本人不对代码中的任何内容负责。 -# 提示 +## 项目概述 + +这是一个微信聊天记录解密工具,支持Windows和macOS平台。工具通过注入DLL或内存扫描的方式获取微信数据库密钥,然后解密微信聊天数据库文件。 + +## 主要功能 + +- **密钥获取**:支持DLL注入和内存扫描两种方式获取微信数据库密钥 +- **数据库解密**:解密微信加密的SQLite数据库文件 +- **图片解密**:解密微信加密的图片文件(需要图片密钥) +- **自动监控**:监控微信数据目录,自动解密新增数据 +- **HTTP服务**:提供本地HTTP服务,支持MCP协议 +- **多平台支持**:支持Windows和macOS +- **多版本支持**:支持微信3.x和4.x版本 + +## 快速开始 + +### 1. 获取密钥DLL 获取key请前往 https://github.com/ycccccccy/wx_key -更新日志: +将 `wx_key.dll` 文件放置在 `lib/windows_x64/` 目录下。 -1.2025年12月13日同步了wx_key项目相关图片解密处理逻辑 +### 2. 编译项目 +```bash +go build -o chatlog_alpha.exe main.go +``` -2.http://127.0.0.1:5030/默认页面增加了清除图片缓存功能,方便测试 +### 3. 运行程序 +```bash +./chatlog_alpha.exe +``` +### 4. 完整使用逻辑 +#### 1.启动chatlog +#### 2.启动微信(不要点击登录) +#### 3.等待chatlog获取到pid信息时,再点击登录 +#### 4.30s后返回key,注意获取图片key还未适配,欢迎大佬提交 -3.对ffmpeg未安装导致的dat转换失败进行显示提示 +## 使用说明 -#重要提示 -1.对dat转换一定要安装ffmpeg,并且在系统变量设置bin目录的path,否则会显式报错 +### 界面操作 +程序启动后会出现TUI界面,主要功能包括: -2.图片解密aes key请转为hex,如:16位aes key -》 34000000386000006538323730000000 +1. **获取密钥**:从微信进程获取数据库密钥和图片密钥 +2. **解密数据**:解密微信数据文件 +3. **启动HTTP服务**:启动本地HTTP & MCP服务器 +4. **开启自动解密**:监控数据目录,自动解密新增数据 +5. **设置**:配置应用程序选项 +6. **切换账号**:切换当前操作的账号 +7. **退出**:退出程序 + +### 密钥获取流程 + +1. **DLL方式(推荐)**: + - 程序会尝试加载 `wx_key.dll` + - 初始化DLL Hook到微信进程 + - 轮询获取密钥(需要用户登录微信并查看聊天记录触发数据库读取) + - 获取成功后自动清理Hook + +2. **原生方式(备用)**: + - 如果DLL不可用,使用内存扫描方式 + - 在微信进程内存中搜索密钥模式 + - 需要微信已登录状态 + +### 临时账户管理 + +程序支持临时账户管理,当微信未登录或重启时: + +- **临时账户名称**:格式为 `未登录微信_PID`,会跟随PID变化自动更新 +- **状态监控**:实时监控微信进程状态变化 +- **自动切换**:微信登录后自动切换为真实账户名称 +- **数据清理**:微信退出后自动清理相关数据 + +## 配置说明 + +### 重要配置项 + +1. **HTTP服务地址**:默认 `127.0.0.1:5030` +2. **工作目录**:解密后文件的存储目录 +3. **数据目录**:微信数据文件所在目录 +4. **数据密钥**:微信数据库解密密钥 +5. **图片密钥**:微信图片解密密钥 + +### 配置文件 + +配置文件位于用户目录下的 `.chatlog/config.json`,包含: +- 历史账号信息 +- 最后使用的账号 +- HTTP服务配置 +- 工作目录设置 + +## 重要提示 + +### 1. ffmpeg依赖 +对dat转换一定要安装ffmpeg,并且在系统变量设置bin目录的path,否则会显式报错。 + +### 2. 图片密钥格式 +图片解密aes key请转为hex,如:16位aes key → `34000000386000006538323730000000` + +### 3. 使用注意事项 +- 获取密钥时微信会短暂卡顿,这是正常现象 +- 请确保微信已登录并打开聊天窗口查看历史消息 +- 对于微信V4,图片密钥可能不是必需的 +- 程序需要管理员/root权限来访问进程内存 + +## 更新日志 + +### 2025年12月14日 +- 优化临时账户名称管理,跟随PID变化自动更新 +- 改进微信进程状态监控逻辑 +- 修复微信重启后PID不更新的问题 +- 增强错误处理和用户提示 + +### 2025年12月13日 +- 同步了wx_key项目相关图片解密处理逻辑 +- http://127.0.0.1:5030/默认页面增加了清除图片缓存功能,方便测试 +- 对ffmpeg未安装导致的dat转换失败进行显示提示 + +### 早期版本 +- 实现DLL密钥获取方式 +- 添加DLL日志记录功能 +- 支持微信未登录状态获取PID +- 实现自动监控和解密功能 +- 添加HTTP服务支持 + +## 文件结构 + +``` +chatlog_alpha/ +├── main.go # 程序入口 +├── internal/ +│ ├── chatlog/ # 聊天记录处理核心 +│ │ ├── app.go # TUI应用程序 +│ │ ├── ctx/ # 上下文管理 +│ │ └── manager.go # 管理器 +│ ├── wechat/ # 微信相关功能 +│ │ ├── wechat.go # 微信账号管理 +│ │ ├── key/ # 密钥提取器 +│ │ ├── decrypt/ # 解密器 +│ │ └── process/ # 进程检测 +│ └── ui/ # 用户界面组件 +├── pkg/ +│ ├── util/ # 工具函数 +│ └── config/ # 配置管理 +├── lib/ +│ └── windows_x64/ # Windows DLL文件 +└── dll调用指南.md # DLL使用指南 +``` + +## 技术实现 + +### 密钥获取机制 + +1. **DLL注入方式**: + - 使用 `wx_key.dll` 注入Shellcode到微信进程 + - 通过共享内存传递密钥数据 + - 支持微信4.x版本 + +2. **内存扫描方式**: + - 读取微信进程内存 + - 搜索特定的密钥模式 + - 支持微信3.x和4.x版本 + +### 进程监控 + +- 实时检测微信进程状态 +- 支持多实例微信 +- 自动处理进程重启 +- 状态变化时更新UI + +### 日志系统 + +- DLL操作日志保存到程序运行目录 +- 详细的错误信息和调试信息 +- 支持日志级别控制 + +## 常见问题 + +### Q: 获取密钥超时怎么办? +A: 请确保: +1. 微信已登录(不能停留在登录界面) +2. 打开任意聊天窗口 +3. 向上滚动查看历史消息(触发数据库读取) +4. 或者发送/接收一条新消息 + +### Q: DLL加载失败怎么办? +A: 请检查: +1. `wx_key.dll` 文件是否在 `lib/windows_x64/` 目录 +2. 系统架构是否匹配(需要64位系统) +3. 杀毒软件是否拦截了DLL加载 + +### Q: 图片解密失败怎么办? +A: 请检查: +1. 图片密钥是否正确(需要16字节的HEX字符串) +2. 微信版本是否支持图片解密 +3. 图片文件是否完整 + +### Q: 程序无法检测到微信进程? +A: 请检查: +1. 微信是否正在运行 +2. 程序是否有足够的权限 +3. 防病毒软件是否阻止了进程访问 + +## 许可证 + +本项目基于原chatlog项目,具体许可证信息请参考原项目。 + +## 免责声明 + +本项目仅供学习和研究使用,请勿用于非法用途。使用本工具产生的任何后果由使用者自行承担。 + +**重要**:请遵守当地法律法规,尊重他人隐私,合法使用本工具。 \ No newline at end of file diff --git a/dll调用指南.md b/dll调用指南.md new file mode 100644 index 0000000..89aa1fd --- /dev/null +++ b/dll调用指南.md @@ -0,0 +1,164 @@ +# wx_key.dll 集成开发指南 + +本文档旨在帮助开发者快速理解并集成 `wx_key.dll`。该组件封装了微信逆向工程的核心逻辑,让你无需关心底层的内存扫描与 Hook 实现,即可在 C#、Flutter 或 C++ 等上层应用中获取微信数据库密钥与图片密钥。 + +--- + +## 1. 核心原理 + +简单来说,`wx_key.dll` 充当了**宿主程序**与**微信进程**之间的桥梁。 + +1. **注入与扫描**:当你的程序加载此 DLL 并调用初始化后,它会通过 `RemoteScanner` 扫描微信进程内存,利用特征码定位密钥获取函数的入口(支持 4.x 多个版本)。PID 由外部传入,DLL 不再枚举进程。 +2. **拦截与共享**:定位成功后,DLL 会写入一段 Shellcode 进行 Hook。当微信尝试读取数据库时,Shellcode 会拦截 32 字节的密钥,将其拷贝到**共享内存缓冲**中,并递增一个 `sequenceNumber`。 +3. **轮询机制**:DLL 内部使用事件唤醒 + 轮询监听线程。你的程序只需定时检查共享内存是否有新的 `sequenceNumber`,即可拿到密钥。 + +> **⚠️ 环境硬性要求**: +> * **架构**:仅支持 x64 系统与 64 位微信客户端(Shellcode 为 x64 汇编)。 +> * **权限**:调用进程若失败可能需要 **管理员身份(Administrator)** 运行。 + +--- + +## 2. API 接口说明 + +所有导出函数均为 C 风格接口,声明文件可见 `wx_key/include/hook_controller.h`。 + +| 接口函数 | 参数说明 | 详细描述 | +| :--- | :--- | :--- | +| **`InitializeHook`** | `DWORD targetPid` (微信进程ID) | **启动入口**。执行远程扫描、分配共享内存并注入 Shellcode。成功返回 `true`,失败请调 `GetLastErrorMsg`。PID 需要调用方自行获取。 | +| **`PollKeyData`** | `char* keyBuf`
`int size` (建议 >= 65) | **获取密钥**。非阻塞检查。如果捕获到密钥,会将其格式化为 64 位 HEX 字符串写入缓冲区并返回 `true`。一次读取后自动清空。 | +| **`GetStatusMessage`** | `char* msgBuf`
`int size`
`int* outLevel` | **获取日志**。读取 DLL 内部运行日志(如“扫描成功”、“特征码未找到”等)。`outLevel` 对应:`0=Info, 1=Success, 2=Error`。 | +| **`CleanupHook`** | 无 | **清理资源**。卸载远程 Hook、释放共享内存并关闭句柄。**程序退出前务必调用**。 | +| **`GetLastErrorMsg`** | 无 | **错误诊断**。返回最近一次操作失败的具体原因。 | + +--- + +## 3. 标准调用流程 + +无论使用哪种语言,集成步骤都应该要遵循以下流程: + +### 第一步:定位进程 +自行查找 `Weixin.exe` 的 PID(进程 ID),DLL 不再替你枚举进程。 + +### 第二步:加载 DLL +将 `wx_key.dll` 加载到当前进程空间。 +* **Flutter**: `DynamicLibrary.open('assets/dll/wx_key.dll')` +* **C#**: `NativeLibrary.Load("wx_key.dll")` + +### 第三步:初始化 (Initialize) +调用 `InitializeHook(pid)`。 +* 如果返回 `false`,通常是因为**权限不足**或**微信版本不支持**(特征码失效),请立即打印 `GetLastErrorMsg()` 排查。 + +### 第四步:轮询 (Polling) +启动一个后台线程或定时器(建议间隔 100ms),循环调用 `PollKeyData` 和 `GetStatusMessage`。 +* **注意**:不要在 UI 线程直接做死循环,也不要设置过长的等待时间。 +* `PollKeyData` 每次返回后会清空共享缓冲;若需要去重,可对 `sequenceNumber` 进行本地缓存比对。 + +### 第五步:清理 (Cleanup) +在程序关闭或不再需要功能时,**必须**调用 `CleanupHook()`。 +* 如果不清理,残留在微信进程内的 Shellcode 可能会在微信后续运行时导致崩溃。 + +--- + +## 4. 代码集成示例 (C#) + +以下代码展示了如何通过 P/Invoke 封装一个健壮的调用类: + +```csharp +using System.Runtime.InteropServices; +using System.Text; + +public class WeChatKeyDumper +{ + private const string DllName = "wx_key.dll"; + + [DllImport(DllName, CallingConvention = CallingConvention.Cdecl)] + private static extern bool InitializeHook(uint targetPid); + + [DllImport(DllName, CallingConvention = CallingConvention.Cdecl)] + private static extern bool PollKeyData(StringBuilder keyBuffer, int bufferSize); + + [DllImport(DllName, CallingConvention = CallingConvention.Cdecl)] + private static extern bool GetStatusMessage(StringBuilder statusBuffer, int bufferSize, out int level); + + [DllImport(DllName, CallingConvention = CallingConvention.Cdecl)] + private static extern bool CleanupHook(); + + [DllImport(DllName, CallingConvention = CallingConvention.Cdecl)] + private static extern IntPtr GetLastErrorMsg(); + + // 启动监听任务 + public void Start(uint pid) + { + if (!InitializeHook(pid)) + { + string error = Marshal.PtrToStringUTF8(GetLastErrorMsg()); + throw new Exception($"初始化失败: {error} (请尝试以管理员身份运行)"); + } + + Task.Run(async () => + { + var keyBuf = new StringBuilder(128); + var logBuf = new StringBuilder(512); + int level; + + try + { + while (true) + { + // 1. 尝试获取密钥 + if (PollKeyData(keyBuf, keyBuf.Capacity)) + { + Console.WriteLine($"[KEY FOUND] {keyBuf}"); + // 拿到密钥后,可根据需求决定是否继续监听 + } + + // 2. 获取内部日志 + while (GetStatusMessage(logBuf, logBuf.Capacity, out level)) + { + Console.WriteLine($"[DLL Log - L{level}] {logBuf}"); + } + + await Task.Delay(100); // 避免 CPU 占用过高 + } + } + finally + { + CleanupHook(); // 确保线程退出时清理环境 + } + }); + } +} +``` + +--- + +## 5. 开发者避坑指南 + +在实际集成中,你可能会遇到这些问题: + +1. **缓冲区溢出**: + `PollKeyData` 返回的是 Hex 字符串,加上结束符至少需要 65 字节。C# 的 `StringBuilder` 或 C++ 的 `char[]` 分配小了会导致内存踩踏,建议给 **128 字节** 以防万一。 + +2. **单例原则**: + 同一个微信进程只能被 Hook 一次。如果需要重启扫描,请先调用 `CleanupHook` 彻底释放资源,再重新 `InitializeHook`。 + +3. **版本兼容性**: + 如果遇到微信更新导致无法获取密钥,通常是特征码偏移变了。此时无需修改上层代码,只需更新 DLL 源码中的 `RemoteScanner` 特征码库并重新编译 DLL 即可。 + +4. **日志前缀**: + DLL 返回的 `GetStatusMessage` 字符串不再附加 `[INFO]/[SUCCESS]` 等标签,UI 层可依据 `outLevel` 自行添加前缀。 + +5. **共享内存结构(供参考)**: + + ```c + typedef struct { + DWORD dataSize; // 固定 32 + BYTE keyBuffer[32]; // 32 字节密钥 + DWORD sequenceNumber; // 每次写入 +1,用于去重 + } SharedKeyData; + ``` + + Shellcode 会在 `dataSize == 32` 时写入并递增 `sequenceNumber`。上层可以持有上次读到的 `sequenceNumber` 来避免重复处理同一条数据。 + +6. **多线程安全**: + 虽然导出函数内部是线程安全的,但为了逻辑清晰,建议仅在一个专用的 Monitor 线程中进行轮询操作。 \ No newline at end of file diff --git a/internal/chatlog/app.go b/internal/chatlog/app.go index d0ee2ce..c011c19 100644 --- a/internal/chatlog/app.go +++ b/internal/chatlog/app.go @@ -6,6 +6,7 @@ import ( "runtime" "time" + "github.com/rs/zerolog/log" "github.com/sjzar/chatlog/internal/chatlog/ctx" "github.com/sjzar/chatlog/internal/ui/footer" "github.com/sjzar/chatlog/internal/ui/form" @@ -141,6 +142,24 @@ func (a *App) refresh() { case <-a.stopRefresh: return case <-tick.C: + // 如果当前账号为空,尝试查找微信进程 + if a.ctx.Current == nil { + // 获取微信实例 + instances := a.m.wechat.GetWeChatInstances() + if len(instances) > 0 { + // 找到微信进程,设置第一个为当前账号 + a.ctx.SwitchCurrent(instances[0]) + log.Info().Msgf("检测到微信进程,PID: %d,已设置为当前账号", instances[0].PID) + } + } + + // 刷新当前账号状态(如果存在) + if a.ctx.Current != nil { + a.ctx.Current.RefreshStatus() + // 更新上下文信息 + a.ctx.Refresh() + } + if a.ctx.AutoDecrypt || a.ctx.HTTPEnabled { a.m.RefreshSession() } diff --git a/internal/chatlog/ctx/context.go b/internal/chatlog/ctx/context.go index 05420ac..e83f4dd 100644 --- a/internal/chatlog/ctx/context.go +++ b/internal/chatlog/ctx/context.go @@ -125,6 +125,7 @@ func (c *Context) SwitchCurrent(info *wechat.Account) { } func (c *Context) Refresh() { if c.Current != nil { + oldAccount := c.Account c.Account = c.Current.Name c.Platform = c.Current.Platform c.Version = c.Current.Version @@ -132,15 +133,27 @@ func (c *Context) Refresh() { c.PID = int(c.Current.PID) c.ExePath = c.Current.ExePath c.Status = c.Current.Status - if c.Current.Key != "" && c.Current.Key != c.DataKey { + // 更新密钥数据 - 如果Current中的密钥为空,也更新Context + if c.Current.Key != c.DataKey { c.DataKey = c.Current.Key } - if c.Current.ImgKey != "" && c.Current.ImgKey != c.ImgKey { + if c.Current.ImgKey != c.ImgKey { c.ImgKey = c.Current.ImgKey } - if c.Current.DataDir != "" && c.Current.DataDir != c.DataDir { + if c.Current.DataDir != c.DataDir { c.DataDir = c.Current.DataDir } + + // 如果账号名称发生变化(例如从临时名称变为真实名称),更新历史记录 + if oldAccount != "" && oldAccount != c.Account { + // 将旧的历史记录迁移到新的账号名称下 + if oldHistory, ok := c.History[oldAccount]; ok { + c.History[c.Account] = oldHistory + delete(c.History, oldAccount) + // 更新配置 + c.UpdateConfig() + } + } } if c.DataUsage == "" && c.DataDir != "" { go func() { diff --git a/internal/wechat/key/extractor.go b/internal/wechat/key/extractor.go index 1b50ede..d4261dd 100644 --- a/internal/wechat/key/extractor.go +++ b/internal/wechat/key/extractor.go @@ -2,6 +2,7 @@ package key import ( "context" + "fmt" "github.com/sjzar/chatlog/internal/errors" "github.com/sjzar/chatlog/internal/wechat/decrypt" @@ -23,11 +24,22 @@ type Extractor interface { } // NewExtractor 创建适合当前平台的密钥提取器 +// 对于Windows平台,优先使用DLL方式(如果DLL存在) func NewExtractor(platform string, version int) (Extractor, error) { switch { case platform == "windows" && version == 3: + // 尝试使用DLL方式 + if extractor, err := NewDLLExtractor(platform, version); err == nil { + return extractor, nil + } + // 如果DLL方式失败,回退到原来的方式 return windows.NewV3Extractor(), nil case platform == "windows" && version == 4: + // 尝试使用DLL方式 + if extractor, err := NewDLLExtractor(platform, version); err == nil { + return extractor, nil + } + // 如果DLL方式失败,回退到原来的方式 return windows.NewV4Extractor(), nil case platform == "darwin" && version == 3: return darwin.NewV3Extractor(), nil @@ -37,3 +49,23 @@ func NewExtractor(platform string, version int) (Extractor, error) { return nil, errors.PlatformUnsupported(platform, version) } } + +// NewDLLExtractor 创建使用DLL的密钥提取器(仅支持Windows) +func NewDLLExtractor(platform string, version int) (Extractor, error) { + if platform != "windows" { + return nil, errors.PlatformUnsupported(platform, version) + } + + // 检查DLL是否可用 + if !windows.IsDLLAvailable() { + return nil, fmt.Errorf("wx_key.dll 不可用") + } + + switch version { + case 3, 4: + // V3和V4都使用相同的DLL提取器 + return windows.NewDLLV4Extractor(), nil + default: + return nil, errors.PlatformUnsupported(platform, version) + } +} diff --git a/internal/wechat/key/windows/dll_extractor.go b/internal/wechat/key/windows/dll_extractor.go new file mode 100644 index 0000000..90b90ca --- /dev/null +++ b/internal/wechat/key/windows/dll_extractor.go @@ -0,0 +1,506 @@ +package windows + +import ( + "context" + "encoding/hex" + "fmt" + "sync" + "time" + "unsafe" + + "github.com/rs/zerolog/log" + "golang.org/x/sys/windows" + + "github.com/sjzar/chatlog/internal/wechat/decrypt" + "github.com/sjzar/chatlog/internal/wechat/model" + "github.com/sjzar/chatlog/pkg/util" +) + +// DLL函数定义 +var ( + modwxkey *windows.LazyDLL + + procInitializeHook *windows.LazyProc + procPollKeyData *windows.LazyProc + procGetStatusMessage *windows.LazyProc + procCleanupHook *windows.LazyProc + procGetLastErrorMsg *windows.LazyProc + + dllAvailable = false // 标记DLL是否可用 +) + +// DLLExtractor 使用wx_key.dll的密钥提取器 +type DLLExtractor struct { + validator *decrypt.Validator + mu sync.Mutex + initialized bool + pid uint32 + lastKey string // 记录上次获取的密钥,用于简单去重 + logger *util.DLLLogger // DLL日志记录器 +} + +// init 初始化DLL函数 +func init() { + // 加载DLL - 使用相对路径 + dllPath := "lib/windows_x64/wx_key.dll" + modwxkey = windows.NewLazyDLL(dllPath) + + // 尝试加载DLL,检查是否可用 + err := modwxkey.Load() + if err != nil { + log.Debug().Err(err).Msg("wx_key.dll 加载失败,将使用原生方式") + dllAvailable = false + return + } + + // 获取函数指针 + procInitializeHook = modwxkey.NewProc("InitializeHook") + procPollKeyData = modwxkey.NewProc("PollKeyData") + procGetStatusMessage = modwxkey.NewProc("GetStatusMessage") + procCleanupHook = modwxkey.NewProc("CleanupHook") + procGetLastErrorMsg = modwxkey.NewProc("GetLastErrorMsg") + + // 检查所有函数是否都成功获取 + if procInitializeHook != nil && procPollKeyData != nil && procGetStatusMessage != nil && + procCleanupHook != nil && procGetLastErrorMsg != nil { + dllAvailable = true + log.Debug().Msg("wx_key.dll 加载成功,将使用DLL方式获取密钥") + } else { + dllAvailable = false + log.Debug().Msg("wx_key.dll 函数获取失败,将使用原生方式") + } +} + +// IsDLLAvailable 检查DLL是否可用 +func IsDLLAvailable() bool { + return dllAvailable +} + +// NewDLLV4Extractor 创建使用DLL的V4密钥提取器 +func NewDLLV4Extractor() *DLLExtractor { + return &DLLExtractor{ + logger: util.GetDLLLogger(), + } +} + +// Extract 从进程中提取密钥(使用DLL方式) +func (e *DLLExtractor) Extract(ctx context.Context, proc *model.Process) (string, string, error) { + // 即使状态是offline(未登录),也允许尝试初始化DLL + // 因为DLL方式可以在用户登录后拦截密钥 + if proc.Status == model.StatusOffline { + log.Info().Msg("微信进程存在但未登录,将尝试初始化DLL,请登录微信后操作") + // 不返回错误,继续执行 + } + + e.mu.Lock() + defer e.mu.Unlock() + + // 清理之前的初始化(如果存在) + if e.initialized { + e.cleanup() + } + + // 初始化DLL + if err := e.initialize(proc.PID); err != nil { + return "", "", err + } + + // 确保无论成功失败都清理 + // 注意:这里使用defer确保cleanup在函数返回前执行 + defer func() { + if e.initialized { + e.cleanup() + } + }() + + // 轮询获取密钥 + return e.pollKeys(ctx, proc.Version) +} + +// initialize 初始化DLL Hook +func (e *DLLExtractor) initialize(pid uint32) error { + // 调用InitializeHook + ret, _, err := procInitializeHook.Call(uintptr(pid)) + if ret == 0 { + // 获取错误信息 + errorMsg := e.getLastError() + + // 记录错误日志 + if e.logger != nil { + e.logger.LogInitialization(pid, false, errorMsg) + } + + if errorMsg != "" { + return fmt.Errorf("初始化DLL失败: %s", errorMsg) + } + if err != nil { + return fmt.Errorf("初始化DLL失败: %v", err) + } + return fmt.Errorf("初始化DLL失败") + } + + e.initialized = true + e.pid = pid + + // 记录成功日志 + if e.logger != nil { + e.logger.LogInitialization(pid, true, "") + e.logger.LogInfo(fmt.Sprintf("DLL初始化成功,PID: %d", pid)) + } + + log.Debug().Msgf("DLL初始化成功,PID: %d", pid) + return nil +} + +// pollKeys 轮询获取密钥 +func (e *DLLExtractor) pollKeys(ctx context.Context, version int) (string, string, error) { + if !e.initialized { + return "", "", fmt.Errorf("DLL未初始化") + } + + // 设置超时时间 - 改为30秒 + timeout := time.After(30 * time.Second) + ticker := time.NewTicker(100 * time.Millisecond) + defer ticker.Stop() + + var dataKey, imgKey string + loginPromptShown := false // 标记是否已显示登录提示 + pollCount := 0 // 轮询计数器 + + for { + select { + case <-ctx.Done(): + return "", "", ctx.Err() + case <-timeout: + // 检查是否获取到了数据密钥 + if dataKey != "" { + // 获取到了数据密钥,但没有获取到图片密钥 + warningMsg := "30秒轮询结束,已获取数据库密钥,但未获取到图片密钥\n" + + "注意:对于微信V4,图片密钥可能不是必需的,或者需要其他方式获取\n" + + "数据库密钥: " + dataKey + log.Warn().Msg("30秒轮询结束,已获取数据库密钥,但未获取到图片密钥") + log.Warn().Msg("注意:对于微信V4,图片密钥可能不是必需的,或者需要其他方式获取") + log.Warn().Msg("数据库密钥: " + dataKey) + + // 记录到日志文件 + if e.logger != nil { + e.logger.LogWarning(warningMsg) + } + + // 返回数据库密钥,图片密钥为空 + return dataKey, "", nil + } else { + // 没有获取到任何密钥 + errorMsg := "获取密钥超时(30秒)!可能的原因:\n" + + "1. 微信未登录 - 请登录微信\n" + + "2. 未触发数据库读取 - 请打开聊天窗口并查看历史消息\n" + + "3. DLL Hook失败 - 检查日志文件查看详细错误\n" + + "4. 微信版本不受支持 - 当前支持: 4.0.x 及以上 4.x 版本" + log.Error().Msg("获取密钥超时(30秒)!可能的原因:") + log.Error().Msg("1. 微信未登录 - 请登录微信") + log.Error().Msg("2. 未触发数据库读取 - 请打开聊天窗口并查看历史消息") + log.Error().Msg("3. DLL Hook失败 - 检查日志文件查看详细错误") + log.Error().Msg("4. 微信版本不受支持 - 当前支持: 4.0.x 及以上 4.x 版本") + + // 记录到日志文件 + if e.logger != nil { + e.logger.LogError(errorMsg) + } + return "", "", fmt.Errorf("获取密钥超时(30秒,请查看上方错误提示)") + } + case <-ticker.C: + pollCount++ + + // 尝试获取密钥 + key, err := e.pollKeyData() + if err != nil { + errorMsg := fmt.Sprintf("轮询密钥失败: %v", err) + log.Err(err).Msg("轮询密钥失败") + // 记录到日志文件 + if e.logger != nil { + e.logger.LogError(errorMsg) + } + continue + } + + if key != "" && key != e.lastKey { + // 简单去重:避免重复处理相同的密钥 + e.lastKey = key + + // 验证密钥类型 + keyBytes, err := hex.DecodeString(key) + if err != nil { + errorMsg := fmt.Sprintf("解码密钥失败: %v", err) + log.Err(err).Msg("解码密钥失败") + // 记录到日志文件 + if e.logger != nil { + e.logger.LogError(errorMsg) + } + continue + } + + // 检查是否是数据库密钥 + if e.validator != nil && e.validator.Validate(keyBytes) { + if dataKey == "" { + dataKey = key + msg := "通过DLL找到数据库密钥: " + key + log.Info().Msg(msg) + // 记录到日志文件 + if e.logger != nil { + e.logger.LogPolling(true, key, "数据库") + e.logger.LogInfo(msg) + } + } + } else if e.validator == nil { + // 验证器为nil时,根据密钥长度判断 + // 数据库密钥通常是32字节(64字符HEX字符串) + // 图片密钥通常是16字节(32字符HEX字符串) + if len(key) == 64 && dataKey == "" { + dataKey = key + msg := "通过DLL找到数据库密钥(无验证): " + key + log.Info().Msg(msg) + // 记录到日志文件 + if e.logger != nil { + e.logger.LogPolling(true, key, "数据库") + e.logger.LogInfo(msg) + } + } else if len(key) == 32 && imgKey == "" { + imgKey = key + msg := "通过DLL找到图片密钥(无验证): " + imgKey + log.Info().Msg(msg) + // 记录到日志文件 + if e.logger != nil { + e.logger.LogPolling(true, imgKey, "图片") + e.logger.LogInfo(msg) + } + } + } + + // 检查是否是图片密钥(取前16字节) + if e.validator != nil && e.validator.ValidateImgKey(keyBytes) { + if imgKey == "" { + imgKey = key[:32] // 16字节的HEX字符串是32个字符 + msg := "通过DLL找到图片密钥: " + imgKey + log.Info().Msg(msg) + // 记录到日志文件 + if e.logger != nil { + e.logger.LogPolling(true, imgKey, "图片") + e.logger.LogInfo(msg) + } + } + } + + // 如果两个密钥都找到了,返回 + if dataKey != "" && imgKey != "" { + return dataKey, imgKey, nil + } + + // 对于微信V3,只需要数据库密钥 + if version == 3 && dataKey != "" { + return dataKey, "", nil + } + + } else { + // 没有获取到密钥,每5秒显示一次操作提示 + // 每100ms轮询一次,50次轮询 = 5秒 + if !loginPromptShown && pollCount%50 == 0 { + msg := "等待获取密钥... 请按以下步骤操作:\n" + + "1. 确保微信已登录(不能停留在登录界面)\n" + + "2. 打开任意聊天窗口\n" + + "3. 向上滚动查看历史消息(触发数据库读取)\n" + + "4. 或者发送/接收一条新消息" + log.Info().Msg("等待获取密钥... 请按以下步骤操作:") + log.Info().Msg("1. 确保微信已登录(不能停留在登录界面)") + log.Info().Msg("2. 打开任意聊天窗口") + log.Info().Msg("3. 向上滚动查看历史消息(触发数据库读取)") + log.Info().Msg("4. 或者发送/接收一条新消息") + + // 记录到日志文件 + if e.logger != nil { + e.logger.LogInfo(msg) + } + loginPromptShown = true + } + } + + // 获取状态信息 + e.getStatusMessages() + + // 每10秒显示一次调试信息 + if pollCount%100 == 0 { + debugMsg := fmt.Sprintf("轮询中... 已轮询 %d 次,已等待 %.1f 秒", pollCount, float64(pollCount)*0.1) + log.Debug().Msg(debugMsg) + + // 记录到日志文件 + if e.logger != nil { + e.logger.LogDebug(debugMsg) + } + } + } + } +} + +// pollKeyData 调用DLL的PollKeyData函数 +func (e *DLLExtractor) pollKeyData() (string, error) { + if !e.initialized { + return "", fmt.Errorf("DLL未初始化") + } + + // 分配缓冲区(至少65字节,建议128字节) + buf := make([]byte, 128) + ret, _, _ := procPollKeyData.Call( + uintptr(unsafe.Pointer(&buf[0])), + uintptr(len(buf)), + ) + + if ret == 0 { + // 没有新密钥 + return "", nil + } + + // 找到以null结尾的字符串 + for i := 0; i < len(buf); i++ { + if buf[i] == 0 { + key := string(buf[:i]) + if key != "" { + debugMsg := fmt.Sprintf("从DLL获取到密钥字符串: %s (长度: %d)", key, len(key)) + log.Debug().Msg(debugMsg) + // 记录到日志文件 + if e.logger != nil { + e.logger.LogDebug(debugMsg) + } + } + return key, nil + } + } + + key := string(buf) + if key != "" { + debugMsg := fmt.Sprintf("从DLL获取到密钥字符串(无null终止): %s (长度: %d)", key, len(key)) + log.Debug().Msg(debugMsg) + // 记录到日志文件 + if e.logger != nil { + e.logger.LogDebug(debugMsg) + } + } + return key, nil +} + +// getStatusMessages 获取DLL状态信息 +func (e *DLLExtractor) getStatusMessages() { + if !e.initialized { + return + } + + buf := make([]byte, 512) + var level int32 + + for { + ret, _, _ := procGetStatusMessage.Call( + uintptr(unsafe.Pointer(&buf[0])), + uintptr(len(buf)), + uintptr(unsafe.Pointer(&level)), + ) + + if ret == 0 { + break + } + + // 找到以null结尾的字符串 + var msg string + for i := 0; i < len(buf); i++ { + if buf[i] == 0 { + msg = string(buf[:i]) + break + } + } + + if msg != "" { + logLevel := "INFO" + if level == 1 { + logLevel = "SUCCESS" + } else if level == 2 { + logLevel = "ERROR" + } + log.Debug().Msgf("[DLL %s] %s", logLevel, msg) + + // 记录到日志文件 + if e.logger != nil { + e.logger.LogStatus(int(level), msg) + } + } + } +} + +// getLastError 获取DLL最后错误信息 +func (e *DLLExtractor) getLastError() string { + ret, _, _ := procGetLastErrorMsg.Call() + if ret == 0 { + return "" + } + + // 将指针转换为Go字符串 + errorMsgPtr := (*byte)(unsafe.Pointer(ret)) + if errorMsgPtr == nil { + return "" + } + + // 计算字符串长度 + length := 0 + for { + if *(*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(errorMsgPtr)) + uintptr(length))) == 0 { + break + } + length++ + if length > 1024 { + break + } + } + + if length == 0 { + return "" + } + + buf := make([]byte, length) + for i := 0; i < length; i++ { + buf[i] = *(*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(errorMsgPtr)) + uintptr(i))) + } + + errorMsg := string(buf) + + // 记录错误信息到日志文件 + if e.logger != nil && errorMsg != "" { + e.logger.LogError(errorMsg) + } + + return errorMsg +} + +// cleanup 清理DLL资源 +func (e *DLLExtractor) cleanup() { + if !e.initialized { + return + } + + procCleanupHook.Call() + e.initialized = false + e.lastKey = "" // 清理上次密钥记录 + + // 记录清理日志 + if e.logger != nil { + e.logger.LogCleanup() + } + + log.Debug().Msg("DLL资源已清理") +} + +// SearchKey 在内存中搜索密钥(DLL方式不支持此功能) +func (e *DLLExtractor) SearchKey(ctx context.Context, memory []byte) (string, bool) { + // DLL方式不支持直接内存搜索 + return "", false +} + +// SetValidate 设置验证器 +func (e *DLLExtractor) SetValidate(validator *decrypt.Validator) { + e.validator = validator +} \ No newline at end of file diff --git a/internal/wechat/manager.go b/internal/wechat/manager.go index b317f35..7d353a1 100644 --- a/internal/wechat/manager.go +++ b/internal/wechat/manager.go @@ -32,6 +32,11 @@ func GetAccounts() []*Account { return DefaultManager.GetAccounts() } +// GetAllProcesses 获取所有微信进程 +func GetAllProcesses() ([]*model.Process, error) { + return DefaultManager.GetAllProcesses() +} + // Manager 微信管理器 type Manager struct { detector process.Detector @@ -97,6 +102,22 @@ func (m *Manager) GetAccounts() []*Account { return m.accounts } +// GetAllProcesses 获取所有微信进程 +func (m *Manager) GetAllProcesses() ([]*model.Process, error) { + // 重新加载进程信息 + if err := m.Load(); err != nil { + return nil, err + } + + // 从processMap中提取所有进程 + processes := make([]*model.Process, 0, len(m.processMap)) + for _, p := range m.processMap { + processes = append(processes, p) + } + + return processes, nil +} + // DecryptDatabase 便捷方法:通过账号名解密数据库 func (m *Manager) DecryptDatabase(ctx context.Context, accountName, dbPath, outputPath string) error { // 获取账号 diff --git a/internal/wechat/process/windows/detector_windows.go b/internal/wechat/process/windows/detector_windows.go index f5cc1a3..9c11187 100644 --- a/internal/wechat/process/windows/detector_windows.go +++ b/internal/wechat/process/windows/detector_windows.go @@ -1,6 +1,7 @@ package windows import ( + "fmt" "path/filepath" "strings" @@ -15,7 +16,10 @@ func initializeProcessInfo(p *process.Process, info *model.Process) error { files, err := p.OpenFiles() if err != nil { log.Err(err).Msgf("获取进程 %d 的打开文件失败", p.Pid) - return err + // 即使获取打开文件失败,也返回进程信息(状态保持为offline) + // 为未登录的进程生成临时账号名称 + info.AccountName = fmt.Sprintf("未登录微信_%d", p.Pid) + return nil } dbPath := V3DBFile @@ -44,5 +48,9 @@ func initializeProcessInfo(p *process.Process, info *model.Process) error { } } + // 如果没有找到数据库文件,进程仍然存在,只是未登录 + // 状态保持为 model.StatusOffline(调用方会处理) + // 为未登录的进程生成临时账号名称 + info.AccountName = fmt.Sprintf("未登录微信_%d", p.Pid) return nil } diff --git a/internal/wechat/wechat.go b/internal/wechat/wechat.go index b509dab..2ad1fb5 100644 --- a/internal/wechat/wechat.go +++ b/internal/wechat/wechat.go @@ -2,8 +2,11 @@ package wechat import ( "context" + "fmt" "os" + "strings" + "github.com/rs/zerolog/log" "github.com/sjzar/chatlog/internal/errors" "github.com/sjzar/chatlog/internal/wechat/decrypt" "github.com/sjzar/chatlog/internal/wechat/key" @@ -43,26 +46,168 @@ func (a *Account) RefreshStatus() error { // 查找所有微信进程 Load() + // 首先尝试通过名称查找 process, err := GetProcess(a.Name) if err != nil { - a.Status = model.StatusOffline - return nil + // 如果通过名称找不到,尝试通过PID查找 + if a.PID != 0 { + // 获取所有进程 + processes, err := GetAllProcesses() + if err != nil { + a.Status = model.StatusOffline + return nil + } + + // 通过PID查找 + var foundByPID bool + for _, p := range processes { + if p.PID == a.PID { + process = p + foundByPID = true + break + } + } + + if !foundByPID { + // 微信可能重启了,原来的PID找不到进程 + // 尝试查找其他微信进程 + if len(processes) > 0 { + // 选择第一个微信进程(假设只有一个微信实例) + process = processes[0] + + // 保存旧的PID用于日志 + oldPID := a.PID + + // 重置账号状态为未登录状态 + a.PID = process.PID + a.ExePath = process.ExePath + a.Platform = process.Platform + a.Version = process.Version + a.FullVersion = process.FullVersion + a.Status = process.Status + a.DataDir = process.DataDir + + // 更新临时账户名称(跟随PID变化) + oldName := a.Name + a.Name = fmt.Sprintf("未登录微信_%d", process.PID) + + // 如果名称变化,记录日志 + if oldName != a.Name { + log.Info().Msgf("临时账户名称从 '%s' 更新为 '%s'", oldName, a.Name) + } + + log.Info().Msgf("微信可能已重启,PID从 %d 变为 %d,账号重置为未登录状态", oldPID, process.PID) + return nil + } else { + // 没有找到任何微信进程 - 微信可能已退出 + a.clearAccountData() + log.Info().Msg("微信进程未找到,可能已退出,已清除账号数据") + return nil + } + } + } else { + // PID为0,尝试查找所有微信进程 + processes, err := GetAllProcesses() + if err != nil { + a.Status = model.StatusOffline + return nil + } + + if len(processes) > 0 { + // 找到微信进程,更新账号信息 + process = processes[0] + + // 更新进程信息 + a.PID = process.PID + a.ExePath = process.ExePath + a.Platform = process.Platform + a.Version = process.Version + a.FullVersion = process.FullVersion + a.Status = process.Status + a.DataDir = process.DataDir + + // 更新临时账户名称(跟随PID变化) + oldName := a.Name + a.Name = fmt.Sprintf("未登录微信_%d", process.PID) + + // 如果名称变化,记录日志 + if oldName != a.Name { + log.Info().Msgf("临时账户名称从 '%s' 更新为 '%s'", oldName, a.Name) + } + + log.Info().Msgf("微信已重新启动,PID: %d,账号重置为未登录状态", process.PID) + return nil + } else { + // 没有找到任何微信进程 + a.Status = model.StatusOffline + return nil + } + } } - if process.AccountName == a.Name { - // 更新进程信息 - a.PID = process.PID - a.ExePath = process.ExePath - a.Platform = process.Platform - a.Version = process.Version - a.FullVersion = process.FullVersion - a.Status = process.Status - a.DataDir = process.DataDir + // 检查PID是否变化(微信可能重启了) + if a.PID != 0 && a.PID != process.PID { + log.Info().Msgf("微信PID变化:从 %d 变为 %d,可能已重启", a.PID, process.PID) + } + + // 更新进程信息 + a.PID = process.PID + a.ExePath = process.ExePath + a.Platform = process.Platform + a.Version = process.Version + a.FullVersion = process.FullVersion + a.Status = process.Status + a.DataDir = process.DataDir + + // 如果账号名称是临时名称,但进程有真实的账号名称,更新账号名称 + if strings.HasPrefix(a.Name, "未登录微信_") && process.AccountName != "" && !strings.HasPrefix(process.AccountName, "未登录微信_") { + a.Name = process.AccountName + } else if strings.HasPrefix(a.Name, "未登录微信_") && (process.AccountName == "" || strings.HasPrefix(process.AccountName, "未登录微信_")) { + // 账号名称是临时名称,但进程没有真实名称(或也是临时名称) + // 检查PID是否变化,如果变化则更新临时名称 + oldName := a.Name + // 从旧名称中提取旧的PID + oldPIDStr := strings.TrimPrefix(oldName, "未登录微信_") + var oldPID uint32 + fmt.Sscanf(oldPIDStr, "%d", &oldPID) + + // 如果PID变化,更新临时名称 + if oldPID != process.PID { + a.Name = fmt.Sprintf("未登录微信_%d", process.PID) + log.Info().Msgf("临时账户PID变化,名称从 '%s' 更新为 '%s'", oldName, a.Name) + } } return nil } +// clearAccountData 清除账号数据(当微信退出时调用) +func (a *Account) clearAccountData() { + // 保存旧的名称用于日志 + oldName := a.Name + + // 清除密钥数据 + a.Key = "" + a.ImgKey = "" + + // 清除路径信息 + a.DataDir = "" + + // 重置状态 + a.Status = model.StatusOffline + + // 重置PID + a.PID = 0 + + // 重置账号名称为临时名称(如果还有PID的话) + // 如果没有PID,保持原有名称或设置为空 + if a.PID == 0 { + // 如果没有PID,无法生成临时名称,保持原有名称 + // 但可以标记为已退出 + log.Info().Msgf("账号 '%s' 的微信已退出,已清除相关数据", oldName) + } +} + // GetKey 获取账号的密钥 func (a *Account) GetKey(ctx context.Context) (string, string, error) { // 如果已经有密钥,直接返回 @@ -75,10 +220,9 @@ func (a *Account) GetKey(ctx context.Context) (string, string, error) { return "", "", errors.RefreshProcessStatusFailed(err) } - // 检查账号状态 - if a.Status != model.StatusOnline { - return "", "", errors.WeChatAccountNotOnline(a.Name) - } + // 注意:不再检查账号状态是否为online + // 因为DLL提取器支持在未登录状态下工作 + // 用户可以在获取密钥过程中登录微信 // 创建密钥提取器 - 使用新的接口,传入平台和版本信息 extractor, err := key.NewExtractor(a.Platform, a.Version) @@ -91,12 +235,22 @@ func (a *Account) GetKey(ctx context.Context) (string, string, error) { return "", "", err } - validator, err := decrypt.NewValidator(process.Platform, process.Version, process.DataDir) - if err != nil { - return "", "", err + // 只有在DataDir存在时才创建验证器 + // 对于DLL方式,微信可能未登录,DataDir可能为空或路径不存在 + var validator *decrypt.Validator + if process.DataDir != "" { + validator, err = decrypt.NewValidator(process.Platform, process.Version, process.DataDir) + if err != nil { + // 如果创建验证器失败,记录警告但不返回错误 + // 因为DLL方式可以不依赖验证器 + log.Warn().Err(err).Msg("创建验证器失败,将继续尝试获取密钥(DLL方式可能不需要验证器)") + validator = nil + } } - extractor.SetValidate(validator) + if validator != nil { + extractor.SetValidate(validator) + } // 提取密钥 dataKey, imgKey, err := extractor.Extract(ctx, process) diff --git a/lib/windows_x64/wx_key.exp b/lib/windows_x64/wx_key.exp new file mode 100644 index 0000000..5591595 Binary files /dev/null and b/lib/windows_x64/wx_key.exp differ diff --git a/lib/windows_x64/wx_key.lib b/lib/windows_x64/wx_key.lib new file mode 100644 index 0000000..246292d Binary files /dev/null and b/lib/windows_x64/wx_key.lib differ diff --git a/lib/windows_x64/wx_key.pdb b/lib/windows_x64/wx_key.pdb new file mode 100644 index 0000000..c7bf5c2 Binary files /dev/null and b/lib/windows_x64/wx_key.pdb differ diff --git a/pkg/util/dll_logger.go b/pkg/util/dll_logger.go new file mode 100644 index 0000000..adfa4f1 --- /dev/null +++ b/pkg/util/dll_logger.go @@ -0,0 +1,171 @@ +package util + +import ( + "fmt" + "os" + "path/filepath" + "sync" + "time" +) + +// DLLLogger DLL日志记录器 +type DLLLogger struct { + mu sync.Mutex + logFile *os.File + logPath string + enabled bool +} + +var ( + dllLogger *DLLLogger + dllLoggerOnce sync.Once +) + +// GetDLLLogger 获取DLL日志记录器单例 +func GetDLLLogger() *DLLLogger { + dllLoggerOnce.Do(func() { + dllLogger = &DLLLogger{ + enabled: true, + } + // 尝试初始化日志文件 + dllLogger.initLogFile() + }) + return dllLogger +} + +// initLogFile 初始化日志文件 +func (l *DLLLogger) initLogFile() { + if !l.enabled { + return + } + + // 获取当前工作目录 + workDir, err := os.Getwd() + if err != nil { + // 如果获取失败,使用默认工作目录 + workDir = DefaultWorkDir("") + } + + // 创建日志目录 + logDir := filepath.Join(workDir, "logs") + if err := os.MkdirAll(logDir, 0755); err != nil { + // 如果创建目录失败,禁用日志 + l.enabled = false + return + } + + // 生成日志文件名:dll_YYYYMMDD_HHMMSS.log + timestamp := time.Now().Format("20060102_150405") + logFileName := fmt.Sprintf("dll_%s.log", timestamp) + l.logPath = filepath.Join(logDir, logFileName) + + // 创建日志文件 + file, err := os.OpenFile(l.logPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644) + if err != nil { + l.enabled = false + return + } + + l.logFile = file +} + +// LogError 记录错误信息 +func (l *DLLLogger) LogError(message string) { + l.log("ERROR", message) +} + +// LogStatus 记录状态信息 +func (l *DLLLogger) LogStatus(level int, message string) { + var levelStr string + switch level { + case 0: + levelStr = "INFO" + case 1: + levelStr = "SUCCESS" + case 2: + levelStr = "ERROR" + default: + levelStr = "UNKNOWN" + } + l.log(levelStr, message) +} + +// LogInfo 记录普通信息 +func (l *DLLLogger) LogInfo(message string) { + l.log("INFO", message) +} + +// LogDebug 记录调试信息 +func (l *DLLLogger) LogDebug(message string) { + l.log("DEBUG", message) +} + +// LogWarning 记录警告信息 +func (l *DLLLogger) LogWarning(message string) { + l.log("WARNING", message) +} + +// log 内部日志记录函数 +func (l *DLLLogger) log(level, message string) { + if !l.enabled || l.logFile == nil { + return + } + + l.mu.Lock() + defer l.mu.Unlock() + + timestamp := time.Now().Format("2006-01-02 15:04:05.000") + logEntry := fmt.Sprintf("[%s] [%s] %s\n", timestamp, level, message) + + if _, err := l.logFile.WriteString(logEntry); err != nil { + // 写入失败,关闭文件并禁用日志 + l.logFile.Close() + l.logFile = nil + l.enabled = false + } +} + +// GetLogPath 获取日志文件路径 +func (l *DLLLogger) GetLogPath() string { + return l.logPath +} + +// IsEnabled 检查日志是否启用 +func (l *DLLLogger) IsEnabled() bool { + return l.enabled +} + +// Close 关闭日志文件 +func (l *DLLLogger) Close() { + l.mu.Lock() + defer l.mu.Unlock() + + if l.logFile != nil { + l.logFile.Close() + l.logFile = nil + } + l.enabled = false +} + +// LogInitialization 记录DLL初始化信息 +func (l *DLLLogger) LogInitialization(pid uint32, success bool, errorMsg string) { + if success { + l.LogStatus(1, fmt.Sprintf("DLL初始化成功,PID: %d", pid)) + } else { + l.LogError(fmt.Sprintf("DLL初始化失败,PID: %d, 错误: %s", pid, errorMsg)) + } +} + +// LogPolling 记录轮询信息 +func (l *DLLLogger) LogPolling(keyFound bool, key string, keyType string) { + if keyFound { + l.LogStatus(1, fmt.Sprintf("找到%s密钥: %s", keyType, key)) + } else { + l.LogStatus(0, "轮询中...") + } +} + +// LogCleanup 记录清理信息 +func (l *DLLLogger) LogCleanup() { + l.LogStatus(0, "DLL资源已清理") +} \ No newline at end of file diff --git a/wx_key/dllmain.cpp b/wx_key/dllmain.cpp new file mode 100644 index 0000000..71524d3 --- /dev/null +++ b/wx_key/dllmain.cpp @@ -0,0 +1,24 @@ +// 远程Hook控制器DLL入口点 +// 这个DLL在Flutter进程中运行,不会被注入到目标进程 +#include +#define HOOK_EXPORTS + +#include "include/hook_controller.h" + +#pragma comment(lib, "Psapi.lib") +#pragma comment(lib, "version.lib") + +// DLL 入口函数 +BOOL APIENTRY DllMain(HMODULE hModule, DWORD ul_reason_for_call, LPVOID lpReserved) { + switch (ul_reason_for_call) { + case DLL_PROCESS_ATTACH: + DisableThreadLibraryCalls(hModule); + break; + + case DLL_PROCESS_DETACH: + // 清理资源 + CleanupHook(); + break; + } + return TRUE; +} \ No newline at end of file diff --git a/wx_key/framework.h b/wx_key/framework.h new file mode 100644 index 0000000..80cbbc9 --- /dev/null +++ b/wx_key/framework.h @@ -0,0 +1,5 @@ +#pragma once + +#define WIN32_LEAN_AND_MEAN // 从 Windows 头文件中排除极少使用的内容 +// Windows 头文件 +#include diff --git a/wx_key/include/hook_controller.h b/wx_key/include/hook_controller.h new file mode 100644 index 0000000..13f2943 --- /dev/null +++ b/wx_key/include/hook_controller.h @@ -0,0 +1,49 @@ +#ifndef HOOK_CONTROLLER_H +#define HOOK_CONTROLLER_H + +#include + +#ifdef HOOK_EXPORTS +#define HOOK_API extern "C" __declspec(dllexport) +#else +#define HOOK_API extern "C" __declspec(dllimport) +#endif + +/** + * 初始化并安装Hook(轮询模式 - 无回调) + * @param targetPid 微信进程的PID + * @return 成功返回true,失败返回false + */ +HOOK_API bool InitializeHook(DWORD targetPid); + +/** + * 轮询检查是否有新的密钥数据(非阻塞) + * @param keyBuffer 输出缓冲区,用于接收密钥十六进制字符串(至少65字节) + * @param bufferSize keyBuffer的大小 + * @return 如果有新数据返回true,否则返回false + */ +HOOK_API bool PollKeyData(char* keyBuffer, int bufferSize); + +/** + * 获取当前状态消息 + * @param statusBuffer 输出缓冲区,用于接收状态消息(至少256字节) + * @param bufferSize statusBuffer的大小 + * @param outLevel 输出状态级别 (0=info, 1=success, 2=error) + * @return 如果有新状态返回true,否则返回false + */ +HOOK_API bool GetStatusMessage(char* statusBuffer, int bufferSize, int* outLevel); + +/** + * 清理并卸载Hook + * @return 成功返回true,失败返回false + */ +HOOK_API bool CleanupHook(); + +/** + * 获取最后一次错误信息 + * @return 错误信息字符串 + */ +HOOK_API const char* GetLastErrorMsg(); + +#endif // HOOK_CONTROLLER_H + diff --git a/wx_key/include/ipc_manager.h b/wx_key/include/ipc_manager.h new file mode 100644 index 0000000..d98a508 --- /dev/null +++ b/wx_key/include/ipc_manager.h @@ -0,0 +1,79 @@ +#ifndef IPC_MANAGER_H +#define IPC_MANAGER_H + +#include +#include +#include +#include + +// 共享内存数据结构 +#pragma pack(push, 1) +struct SharedKeyData { + DWORD dataSize; // 数据大小 + BYTE keyBuffer[32]; // 密钥数据(最大32字节) + DWORD sequenceNumber; // 序列号 +}; +#pragma pack(pop) + +// IPC管理器类(轮询模式 - 用于远程进程缓冲区读取) +class IPCManager { +public: + IPCManager(); + ~IPCManager(); + + // 初始化IPC(控制器端 - 轮询模式) + bool Initialize(const std::string& uniqueId); + + // 设置远程缓冲区地址(目标进程中的地址) + void SetRemoteBuffer(HANDLE hProcess, PVOID remoteBufferAddr); + + // 清理资源 + void Cleanup(); + + // 设置数据接收回调 + void SetDataCallback(std::function callback); + + // 启动监听线程(轮询远程缓冲区) + bool StartListening(); + + // 停止监听线程 + void StopListening(); + + // 获取共享内存地址(用于传递给Shellcode) + PVOID GetSharedMemoryAddress() const; + + // 获取事件句柄(用于传递给Shellcode) + HANDLE GetEventHandle() const; + + // 获取共享内存名称 + std::string GetSharedMemoryName() const { return sharedMemoryName; } + + // 获取事件名称 + std::string GetEventName() const { return eventName; } + +private: + std::string uniqueId; + std::string sharedMemoryName; + std::string eventName; + + HANDLE hMapFile; + HANDLE hEvent; + PVOID pSharedMemory; + + // 远程进程轮询相关 + HANDLE hTargetProcess; + PVOID pRemoteBuffer; + DWORD lastSequenceNumber; + + HANDLE hListeningThread; + std::atomic shouldStopListening; + + std::function dataCallback; + + // 监听线程函数(轮询模式) + static DWORD WINAPI ListeningThreadProc(LPVOID lpParam); + void ListeningLoop(); +}; + +#endif // IPC_MANAGER_H + diff --git a/wx_key/include/remote_hooker.h b/wx_key/include/remote_hooker.h new file mode 100644 index 0000000..5762bf3 --- /dev/null +++ b/wx_key/include/remote_hooker.h @@ -0,0 +1,72 @@ +#ifndef REMOTE_HOOKER_H +#define REMOTE_HOOKER_H + +#include +#include +#include "shellcode_builder.h" +#include "remote_memory.h" + +// 远程Hook管理器 +class RemoteHooker { +public: + RemoteHooker(HANDLE hProcess); + ~RemoteHooker(); + void EnableHardwareBreakpointMode(bool enabled) { useHardwareBreakpoint = enabled; } + + /** + * 安装远程Hook + * @param targetFunctionAddress 目标函数地址 + * @param shellcodeConfig Shellcode配置 + * @return 成功返回true + */ + bool InstallHook(uintptr_t targetFunctionAddress, const ShellcodeConfig& shellcodeConfig); + + /** + * 卸载Hook + * @return 成功返回true + */ + bool UninstallHook(); + + /** + * 获取Trampoline地址(用于Shellcode配置) + * @return Trampoline地址 + */ + uintptr_t GetTrampolineAddress() const { return trampolineAddress; } + + /** + * 获取远程Shellcode地址(供VEH模式使用) + */ + uintptr_t GetRemoteShellcodeAddress() const { return remoteShellcodeAddress; } + +private: + HANDLE hProcess; + + // Hook相关状态 + uintptr_t targetAddress; + uintptr_t remoteShellcodeAddress; + uintptr_t trampolineAddress; + std::vector originalBytes; + bool isHookInstalled; + bool useHardwareBreakpoint{false}; + RemoteMemory trampolineMemory; + RemoteMemory shellcodeMemory; + + // 在远程进程分配内存 + PVOID RemoteAllocate(SIZE_T size, DWORD protect); + + // 读取/写入/保护 + bool RemoteWrite(PVOID address, const void* data, SIZE_T size); + bool RemoteRead(PVOID address, void* buffer, SIZE_T size); + bool RemoteProtect(PVOID address, SIZE_T size, DWORD newProtect, DWORD* oldProtect); + + // 创建Trampoline(保存原始指令) + bool CreateTrampoline(uintptr_t targetAddress); + + // 计算需要备份的指令长度 + size_t CalculateHookLength(const BYTE* code); + + // 生成跳转指令(5字节短跳转或14字节长跳转) + std::vector GenerateJumpInstruction(uintptr_t from, uintptr_t to); +}; + +#endif // REMOTE_HOOKER_H diff --git a/wx_key/include/remote_memory.h b/wx_key/include/remote_memory.h new file mode 100644 index 0000000..b726f98 --- /dev/null +++ b/wx_key/include/remote_memory.h @@ -0,0 +1,106 @@ +#ifndef REMOTE_MEMORY_H +#define REMOTE_MEMORY_H + +#include +#include "syscalls.h" + +// RAII wrapper for remote allocations using NtAllocateVirtualMemory/NtFreeVirtualMemory +class RemoteMemory { +public: + RemoteMemory() = default; + RemoteMemory(HANDLE process, SIZE_T size, ULONG protect) { + allocate(process, size, protect); + } + + RemoteMemory(const RemoteMemory&) = delete; + RemoteMemory& operator=(const RemoteMemory&) = delete; + + RemoteMemory(RemoteMemory&& other) noexcept { + moveFrom(std::move(other)); + } + + RemoteMemory& operator=(RemoteMemory&& other) noexcept { + if (this != &other) { + reset(); + moveFrom(std::move(other)); + } + return *this; + } + + ~RemoteMemory() { + reset(); + } + + bool allocate(HANDLE process, SIZE_T size, ULONG protect) { + reset(); + hProcess = process; + sizeBytes = size; + base = nullptr; + SIZE_T regionSize = size; + NTSTATUS status = IndirectSyscalls::NtAllocateVirtualMemory( + hProcess, + &base, + 0, + ®ionSize, + MEM_COMMIT | MEM_RESERVE, + protect + ); + if (status != STATUS_SUCCESS) { + base = nullptr; + sizeBytes = 0; + return false; + } + return true; + } + + void reset() { + if (base) { + SIZE_T regionSize = 0; + PVOID addr = base; + IndirectSyscalls::NtFreeVirtualMemory(hProcess, &addr, ®ionSize, MEM_RELEASE); + base = nullptr; + sizeBytes = 0; + hProcess = nullptr; + } + } + + bool protect(ULONG newProtect, ULONG* oldProtect = nullptr) { + if (!base || sizeBytes == 0) { + return false; + } + PVOID addr = base; + SIZE_T regionSize = sizeBytes; + ULONG oldProt = 0; + NTSTATUS status = IndirectSyscalls::NtProtectVirtualMemory( + hProcess, + &addr, + ®ionSize, + newProtect, + &oldProt + ); + if (oldProtect) { + *oldProtect = oldProt; + } + return status == STATUS_SUCCESS; + } + + PVOID get() const { return base; } + SIZE_T size() const { return sizeBytes; } + bool valid() const { return base != nullptr; } + +private: + void moveFrom(RemoteMemory&& other) { + hProcess = other.hProcess; + base = other.base; + sizeBytes = other.sizeBytes; + other.hProcess = nullptr; + other.base = nullptr; + other.sizeBytes = 0; + } + + HANDLE hProcess{ nullptr }; + PVOID base{ nullptr }; + SIZE_T sizeBytes{ 0 }; +}; + +#endif // REMOTE_MEMORY_H diff --git a/wx_key/include/remote_scanner.h b/wx_key/include/remote_scanner.h new file mode 100644 index 0000000..6a6ce75 --- /dev/null +++ b/wx_key/include/remote_scanner.h @@ -0,0 +1,69 @@ +#ifndef REMOTE_SCANNER_H +#define REMOTE_SCANNER_H + +#include +#include +#include + +// 远程进程信息 +struct RemoteModuleInfo { + HMODULE baseAddress; + SIZE_T imageSize; + std::string moduleName; +}; + +// 远程特征码扫描器 +class RemoteScanner { +public: + RemoteScanner(HANDLE hProcess); + ~RemoteScanner(); + + // 获取远程进程的模块信息 + bool GetRemoteModuleInfo(const std::string& moduleName, RemoteModuleInfo& outInfo); + + // 在远程进程中查找特征码(单个结果) + uintptr_t FindPattern(const RemoteModuleInfo& moduleInfo, const BYTE* pattern, const char* mask); + + // 在远程进程中查找特征码(所有结果) + std::vector FindAllPatterns(const RemoteModuleInfo& moduleInfo, const BYTE* pattern, const char* mask); + + // 读取远程内存 + bool ReadRemoteMemory(uintptr_t address, void* buffer, SIZE_T size); + + // 获取微信版本号 + std::string GetWeChatVersion(); + +private: + HANDLE hProcess; + + // 本地缓冲区,用于批量读取远程内存 + std::vector scanBuffer; + + // 内存匹配辅助函数 + bool MatchPattern(const BYTE* data, const BYTE* pattern, const char* mask, size_t length); +}; + +// 微信版本配置 +struct WeChatVersionConfig { + std::string version; // 版本号 + std::vector pattern; // 特征码 + std::string mask; // 掩码 + int offset; // 偏移量 + + WeChatVersionConfig(const std::string& ver, const std::vector& pat, const std::string& msk, int off) + : version(ver), pattern(pat), mask(msk), offset(off) {} +}; + +// 版本配置管理器 +class VersionConfigManager { +public: + static void InitializeConfigs(); + static const WeChatVersionConfig* GetConfigForVersion(const std::string& version); + +private: + static std::vector configs; + static bool initialized; +}; + +#endif // REMOTE_SCANNER_H + diff --git a/wx_key/include/remote_veh.h b/wx_key/include/remote_veh.h new file mode 100644 index 0000000..418a37c --- /dev/null +++ b/wx_key/include/remote_veh.h @@ -0,0 +1,28 @@ +#ifndef REMOTE_VEH_H +#define REMOTE_VEH_H + +#include +#include +#include "remote_memory.h" + +struct RemoteVehConfig { + HANDLE hProcess; + uintptr_t targetAddress; + uintptr_t shellcodeAddress; +}; + +// 工具:在远程进程注册VEH,并为所有线程设置硬件断点 +// 返回句柄和用于卸载的必要信息 +struct RemoteVehHandle { + PVOID vehHandle; + PVOID handlerCode; + PVOID dataBlock; + uintptr_t unregStubAddress; + RemoteMemory remoteMemory; + bool installed; +}; + +RemoteVehHandle InstallRemoteVeh(const RemoteVehConfig& cfg); +void UninstallRemoteVeh(const RemoteVehConfig& cfg, RemoteVehHandle& handle); + +#endif // REMOTE_VEH_H diff --git a/wx_key/include/shellcode_builder.h b/wx_key/include/shellcode_builder.h new file mode 100644 index 0000000..e195ac7 --- /dev/null +++ b/wx_key/include/shellcode_builder.h @@ -0,0 +1,37 @@ +#ifndef SHELLCODE_BUILDER_H +#define SHELLCODE_BUILDER_H + +#include +#include +#include + +// Shellcode配置 +struct ShellcodeConfig { + PVOID sharedMemoryAddress; // 共享内存地址 + HANDLE eventHandle; // 事件句柄 + uintptr_t trampolineAddress; // Trampoline地址(原始函数继续执行的地址) + bool enableStackSpoofing{false}; // 是否启用堆栈伪造 + uintptr_t spoofStackPointer{0}; // 伪造栈指针(指向伪栈顶) +}; + +// Shellcode构建器 +class ShellcodeBuilder { +public: + ShellcodeBuilder(); + ~ShellcodeBuilder(); + + // 构建Hook Shellcode + std::vector BuildHookShellcode(const ShellcodeConfig& config); + + // 获取Shellcode大小 + size_t GetShellcodeSize() const; + +private: + std::vector shellcode; + + // 清除Shellcode + void Clear(); +}; + +#endif // SHELLCODE_BUILDER_H + diff --git a/wx_key/include/string_obfuscator.h b/wx_key/include/string_obfuscator.h new file mode 100644 index 0000000..898d4d5 --- /dev/null +++ b/wx_key/include/string_obfuscator.h @@ -0,0 +1,61 @@ + #ifndef STRING_OBFUSCATOR_H + #define STRING_OBFUSCATOR_H + + #include + #include + + // 编译时字符串加密 - XOR加密 + template + class ObfuscatedString { + private: + std::array data; + + constexpr char decrypt_char(char c, size_t i) const { + return c ^ static_cast(0xAA + (i % 256)); + } + + public: + template + constexpr ObfuscatedString(const char* str, std::index_sequence) + : data{ {static_cast(str[I] ^ static_cast(0xAA + (I % 256)))...} } {} + + std::string decrypt() const { + std::string result; + if (N > 0) { + result.reserve(N - 1); + for (size_t i = 0; i < N - 1; ++i) { + result.push_back(decrypt_char(data[i], i)); + } + } + return result; + } + }; + + // 辅助宏定义 + #define OBFUSCATE_STR(str) \ + ([]() { \ + constexpr auto size = sizeof(str); \ + return ObfuscatedString(str, std::make_index_sequence{}); \ + }().decrypt()) + + // 常用字符串的加密版本 + namespace ObfuscatedStrings { + inline std::string GetSharedMemoryName() { + return OBFUSCATE_STR("Global\\WxKeySharedMemory_{GUID}"); + } + + inline std::string GetEventName() { + return OBFUSCATE_STR("Global\\WxKeyEvent_{GUID}"); + } + + inline std::string GetWeixinDllName() { + return OBFUSCATE_STR("Weixin.dll"); + } + + inline std::string GetNtdllName() { + return OBFUSCATE_STR("ntdll.dll"); + } + } + + #endif // STRING_OBFUSCATOR_H + diff --git a/wx_key/include/syscalls.h b/wx_key/include/syscalls.h new file mode 100644 index 0000000..b401811 --- /dev/null +++ b/wx_key/include/syscalls.h @@ -0,0 +1,188 @@ +#ifndef SYSCALLS_H +#define SYSCALLS_H + +#include +#include +#include + +// Extended NT definitions +#ifndef STATUS_SUCCESS +#define STATUS_SUCCESS ((NTSTATUS)0x00000000L) +#endif + +#ifndef STATUS_UNSUCCESSFUL +#define STATUS_UNSUCCESSFUL ((NTSTATUS)0xC0000001L) +#endif + +// CLIENT_ID structure (if not defined) +#ifndef _CLIENT_ID_DEFINED +#define _CLIENT_ID_DEFINED +typedef struct _MY_CLIENT_ID { + PVOID UniqueProcess; + PVOID UniqueThread; +} MY_CLIENT_ID, *PMY_CLIENT_ID; +#endif + +// OBJECT_ATTRIBUTES structure (simplified) +#ifndef _MY_OBJECT_ATTRIBUTES_DEFINED +#define _MY_OBJECT_ATTRIBUTES_DEFINED +typedef struct _MY_OBJECT_ATTRIBUTES { + ULONG Length; + HANDLE RootDirectory; + PVOID ObjectName; + ULONG Attributes; + PVOID SecurityDescriptor; + PVOID SecurityQualityOfService; +} MY_OBJECT_ATTRIBUTES, *PMY_OBJECT_ATTRIBUTES; +#endif + +// NT函数原型 +typedef NTSTATUS(NTAPI* pNtOpenProcess)( + PHANDLE ProcessHandle, + ACCESS_MASK DesiredAccess, + PMY_OBJECT_ATTRIBUTES ObjectAttributes, + PMY_CLIENT_ID ClientId +); + +typedef NTSTATUS(NTAPI* pNtReadVirtualMemory)( + HANDLE ProcessHandle, + PVOID BaseAddress, + PVOID Buffer, + SIZE_T BufferSize, + PSIZE_T NumberOfBytesRead +); + +typedef NTSTATUS(NTAPI* pNtWriteVirtualMemory)( + HANDLE ProcessHandle, + PVOID BaseAddress, + PVOID Buffer, + SIZE_T BufferSize, + PSIZE_T NumberOfBytesWritten +); + +typedef NTSTATUS(NTAPI* pNtAllocateVirtualMemory)( + HANDLE ProcessHandle, + PVOID* BaseAddress, + ULONG_PTR ZeroBits, + PSIZE_T RegionSize, + ULONG AllocationType, + ULONG Protect +); + +typedef NTSTATUS(NTAPI* pNtFreeVirtualMemory)( + HANDLE ProcessHandle, + PVOID* BaseAddress, + PSIZE_T RegionSize, + ULONG FreeType +); + +typedef NTSTATUS(NTAPI* pNtProtectVirtualMemory)( + HANDLE ProcessHandle, + PVOID* BaseAddress, + PSIZE_T RegionSize, + ULONG NewProtect, + PULONG OldProtect +); + +typedef NTSTATUS(NTAPI* pNtQueryInformationProcess)( + HANDLE ProcessHandle, + PROCESSINFOCLASS ProcessInformationClass, + PVOID ProcessInformation, + ULONG ProcessInformationLength, + PULONG ReturnLength +); + +// 间接系统调用类 +class IndirectSyscalls { +public: + static bool Initialize(); + static void Cleanup(); + + // 封装的系统调用函数 + static NTSTATUS NtOpenProcess( + PHANDLE ProcessHandle, + ACCESS_MASK DesiredAccess, + PMY_OBJECT_ATTRIBUTES ObjectAttributes, + PMY_CLIENT_ID ClientId + ); + + static NTSTATUS NtReadVirtualMemory( + HANDLE ProcessHandle, + PVOID BaseAddress, + PVOID Buffer, + SIZE_T BufferSize, + PSIZE_T NumberOfBytesRead + ); + + static NTSTATUS NtWriteVirtualMemory( + HANDLE ProcessHandle, + PVOID BaseAddress, + PVOID Buffer, + SIZE_T BufferSize, + PSIZE_T NumberOfBytesWritten + ); + + static NTSTATUS NtAllocateVirtualMemory( + HANDLE ProcessHandle, + PVOID* BaseAddress, + ULONG_PTR ZeroBits, + PSIZE_T RegionSize, + ULONG AllocationType, + ULONG Protect + ); + + static NTSTATUS NtFreeVirtualMemory( + HANDLE ProcessHandle, + PVOID* BaseAddress, + PSIZE_T RegionSize, + ULONG FreeType + ); + + static NTSTATUS NtProtectVirtualMemory( + HANDLE ProcessHandle, + PVOID* BaseAddress, + PSIZE_T RegionSize, + ULONG NewProtect, + PULONG OldProtect + ); + + static NTSTATUS NtQueryInformationProcess( + HANDLE ProcessHandle, + PROCESSINFOCLASS ProcessInformationClass, + PVOID ProcessInformation, + ULONG ProcessInformationLength, + PULONG ReturnLength + ); + +private: + static bool initialized; + + // 动态获取的函数指针 + static pNtOpenProcess fnNtOpenProcess; + static pNtReadVirtualMemory fnNtReadVirtualMemory; + static pNtWriteVirtualMemory fnNtWriteVirtualMemory; + static pNtAllocateVirtualMemory fnNtAllocateVirtualMemory; + static pNtFreeVirtualMemory fnNtFreeVirtualMemory; + static pNtProtectVirtualMemory fnNtProtectVirtualMemory; + static pNtQueryInformationProcess fnNtQueryInformationProcess; + + // 直接SSN调用的stub指针 + static pNtOpenProcess scNtOpenProcess; + static pNtReadVirtualMemory scNtReadVirtualMemory; + static pNtWriteVirtualMemory scNtWriteVirtualMemory; + static pNtAllocateVirtualMemory scNtAllocateVirtualMemory; + static pNtFreeVirtualMemory scNtFreeVirtualMemory; + static pNtProtectVirtualMemory scNtProtectVirtualMemory; + static pNtQueryInformationProcess scNtQueryInformationProcess; + + // 辅助函数:从ntdll解析函数地址 + template + static bool ResolveFunction(const char* functionName, T& functionPointer); + + // 辅助:从ntdll stub提取SSN并构建直调stub + static uint32_t ExtractSyscallNumber(void* fnAddress); + static void* CreateSyscallStub(uint32_t ssn); +}; + +#endif // SYSCALLS_H + diff --git a/wx_key/include/veh_hook_manager.h b/wx_key/include/veh_hook_manager.h new file mode 100644 index 0000000..3f62abd --- /dev/null +++ b/wx_key/include/veh_hook_manager.h @@ -0,0 +1,32 @@ +#ifndef VEH_HOOK_MANAGER_H +#define VEH_HOOK_MANAGER_H + +#include +#include +#include +#include + +// 硬件断点 + VEH 管理(当前进程,多线程遍历) +class VehHookManager { +public: + VehHookManager(); + ~VehHookManager(); + + // 安装硬件断点并注册VEH + bool Install(uintptr_t address, std::function callback); + // 卸载 + void Uninstall(); + +private: + static LONG CALLBACK VectoredHandler(EXCEPTION_POINTERS* info); + bool SetHardwareBreakpoint(uintptr_t address); + void ClearHardwareBreakpoint(); + + uintptr_t targetAddress; + void* vehHandle; + std::function userCallback; + std::unordered_map originalContexts; + bool installed; +}; + +#endif // VEH_HOOK_MANAGER_H diff --git a/wx_key/pch.cpp b/wx_key/pch.cpp new file mode 100644 index 0000000..b6fb8f4 --- /dev/null +++ b/wx_key/pch.cpp @@ -0,0 +1,5 @@ +// pch.cpp: 与预编译标头对应的源文件 + +#include "pch.h" + +// 当使用预编译的头时,需要使用此源文件,编译才能成功。 diff --git a/wx_key/pch.h b/wx_key/pch.h new file mode 100644 index 0000000..9660927 --- /dev/null +++ b/wx_key/pch.h @@ -0,0 +1,13 @@ +// pch.h: 这是预编译标头文件。 +// 下方列出的文件仅编译一次,提高了将来生成的生成性能。 +// 这还将影响 IntelliSense 性能,包括代码完成和许多代码浏览功能。 +// 但是,如果此处列出的文件中的任何一个在生成之间有更新,它们全部都将被重新编译。 +// 请勿在此处添加要频繁更新的文件,这将使得性能优势无效。 + +#ifndef PCH_H +#define PCH_H + +// 添加要在此处预编译的标头 +#include "framework.h" + +#endif //PCH_H diff --git a/wx_key/src/hook_controller.cpp b/wx_key/src/hook_controller.cpp new file mode 100644 index 0000000..6cd414b --- /dev/null +++ b/wx_key/src/hook_controller.cpp @@ -0,0 +1,490 @@ +#define HOOK_EXPORTS + +#include +#include +#include +#include +#include +#include +#include + +#include "../include/hook_controller.h" +#include "../include/syscalls.h" +#include "../include/remote_scanner.h" +#include "../include/ipc_manager.h" +#include "../include/remote_hooker.h" +#include "../include/shellcode_builder.h" +#include "../include/string_obfuscator.h" +#include "../include/remote_veh.h" +#include "../include/remote_memory.h" + +#pragma execution_character_set("utf-8") + +// 全局状态 +namespace { + bool InitializeContext(DWORD targetPid); + void CleanupContext(); + struct StatusMessage { + std::string message; + int level; + }; + + struct HookContext { + HANDLE hProcess{ nullptr }; + std::unique_ptr ipc; + std::unique_ptr hooker; + RemoteMemory remoteData; + RemoteMemory spoofStack; + CRITICAL_SECTION dataLock{}; + bool csInitialized{ false }; + std::string pendingKeyData; + bool hasNewKey{ false }; + std::vector statusQueue; + bool initialized{ false }; + + void InitLock() { + if (!csInitialized) { + InitializeCriticalSection(&dataLock); + csInitialized = true; + } + } + + void FreeLock() { + if (csInitialized) { + DeleteCriticalSection(&dataLock); + csInitialized = false; + } + } + + void ResetDataQueues() { + pendingKeyData.clear(); + hasNewKey = false; + statusQueue.clear(); + } + }; + + HookContext g_ctx; + std::string g_lastError; + const char* kSupportedRange = "4.0.x 及以上 4.x 版本"; + + std::string WideToUtf8(const std::wstring& wide) { + if (wide.empty()) { + return std::string(); + } + int sizeNeeded = WideCharToMultiByte( + CP_UTF8, + 0, + wide.c_str(), + static_cast(wide.size()), + nullptr, + 0, + nullptr, + nullptr + ); + if (sizeNeeded <= 0) { + return std::string(); + } + std::string utf8(sizeNeeded, 0); + WideCharToMultiByte( + CP_UTF8, + 0, + wide.c_str(), + static_cast(wide.size()), + reinterpret_cast(&utf8[0]), + sizeNeeded, + nullptr, + nullptr + ); + return utf8; + } + + // 生成唯一ID + std::string GenerateUniqueId(DWORD pid) { + std::stringstream ss; + ss << std::hex << pid << "_" << GetTickCount64(); + return ss.str(); + } + + // 发送状态信息 + void SendStatus(const std::string& message, int level) { + // 统一由外层日志系统加等级前缀 + const std::string& prefixed = message; + if (g_ctx.csInitialized) { + EnterCriticalSection(&g_ctx.dataLock); + } + g_ctx.statusQueue.push_back({prefixed, level}); + // 限制队列大小 + if (g_ctx.statusQueue.size() > 100) { + g_ctx.statusQueue.erase(g_ctx.statusQueue.begin()); + } + if (g_ctx.csInitialized) { + LeaveCriticalSection(&g_ctx.dataLock); + } + } + + std::string GetSystemErrorMessage(DWORD errorCode) { + if (errorCode == 0) { + return std::string(); + } + + LPWSTR buffer = nullptr; + DWORD length = FormatMessageW( + FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, + nullptr, + errorCode, + MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), + reinterpret_cast(&buffer), + 0, + nullptr + ); + + std::string message; + if (length && buffer) { + std::wstring wideMessage(buffer, length); + while (!wideMessage.empty() && (wideMessage.back() == L'\r' || wideMessage.back() == L'\n')) { + wideMessage.pop_back(); + } + message = WideToUtf8(wideMessage); + } + + if (buffer) { + LocalFree(buffer); + } + return message; + } + + std::string FormatWin32Error(const std::string& baseMessage, DWORD errorCode) { + std::ostringstream oss; + oss << baseMessage; + if (errorCode != 0) { + oss << " (code " << errorCode << ")"; + std::string detail = GetSystemErrorMessage(errorCode); + if (!detail.empty()) { + oss << ": " << detail; + } + } + return oss.str(); + } + + std::string FormatNtStatusError(const std::string& baseMessage, NTSTATUS status) { + std::ostringstream oss; + oss << baseMessage << " (NTSTATUS 0x" + << std::uppercase << std::hex << std::setw(8) << std::setfill('0') + << static_cast(status) << ")"; + return oss.str(); + } + + // 设置错误信息 + void SetLastError(const std::string& error) { + g_lastError = error; + SendStatus(error, 2); // level 2 = error + } + // 数据回调处理(从IPC线程调用) + void OnDataReceived(const SharedKeyData& data) { + // Validate data + if (data.dataSize != 32) { + SendStatus("收到的密钥数据长度不正确", 2); + return; + } + + // 转换为十六进制字符串 + std::stringstream ss; + ss << std::hex << std::setfill('0'); + for (DWORD i = 0; i < data.dataSize; i++) { + ss << std::setw(2) << static_cast(data.keyBuffer[i]); + } + + std::string keyHex = ss.str(); + + // 存入队列 + if (g_ctx.csInitialized) { + EnterCriticalSection(&g_ctx.dataLock); + } + g_ctx.pendingKeyData = keyHex; + g_ctx.hasNewKey = true; + if (g_ctx.csInitialized) { + LeaveCriticalSection(&g_ctx.dataLock); + } + + SendStatus("已成功接收到密钥", 1); // level 1 = success + } +} + +namespace { + bool InitializeContext(DWORD targetPid) { + if (g_ctx.initialized) { + SetLastError("Hook已经初始化"); + return false; + } + + g_ctx.InitLock(); + g_ctx.ResetDataQueues(); + + SendStatus("开始初始化Hook系统...", 0); + + // 1. 初始化系统调用 + SendStatus("正在初始化系统调用...", 0); + if (!IndirectSyscalls::Initialize()) { + DWORD errorCode = GetLastError(); + SetLastError(FormatWin32Error("初始化间接系统调用失败", errorCode)); + g_ctx.FreeLock(); + return false; + } + + // 2. 打开进程 + SendStatus("正在打开目标进程...", 0); + MY_OBJECT_ATTRIBUTES objAttr; + memset(&objAttr, 0, sizeof(MY_OBJECT_ATTRIBUTES)); + objAttr.Length = sizeof(MY_OBJECT_ATTRIBUTES); + + MY_CLIENT_ID clientId; + memset(&clientId, 0, sizeof(MY_CLIENT_ID)); + clientId.UniqueProcess = (PVOID)(ULONG_PTR)targetPid; + + HANDLE hProcess = NULL; + NTSTATUS status = IndirectSyscalls::NtOpenProcess( + &hProcess, + PROCESS_ALL_ACCESS, + &objAttr, + &clientId + ); + g_ctx.hProcess = hProcess; + if (status != STATUS_SUCCESS || !g_ctx.hProcess) { + SetLastError(FormatNtStatusError("打开目标进程失败", status)); + CleanupContext(); + return false; + } + + // 3. 获取微信版本 + SendStatus("正在检测微信版本...", 0); + RemoteScanner scanner(g_ctx.hProcess); + std::string wechatVersion = scanner.GetWeChatVersion(); + if (wechatVersion.empty()) { + SetLastError("获取微信版本失败,目标进程可能已退出"); + CleanupContext(); + return false; + } + + { + std::stringstream versionMsg; + versionMsg << u8"检测到的微信版本: " << wechatVersion; + SendStatus(versionMsg.str(), 0); + } + + // 4. 获取版本配置 + const WeChatVersionConfig* config = VersionConfigManager::GetConfigForVersion(wechatVersion); + if (!config) { + std::string errorMsg = std::string(u8"不支持的微信版本: ") + wechatVersion + ",支持范围: " + kSupportedRange; + SetLastError(errorMsg); + CleanupContext(); + return false; + } + + // 5. 扫描函数 + SendStatus("正在扫描目标函数...", 0); + std::string weixinDll = ObfuscatedStrings::GetWeixinDllName(); + RemoteModuleInfo moduleInfo; + + if (!scanner.GetRemoteModuleInfo(weixinDll, moduleInfo)) { + SetLastError("未找到Weixin.dll模块"); + CleanupContext(); + return false; + } + + std::vector results = scanner.FindAllPatterns( + moduleInfo, + config->pattern.data(), + config->mask.c_str() + ); + + if (results.size() != 1) { + std::stringstream errorMsg; + errorMsg << u8"模式匹配失败,找到 " << results.size() << u8" 个结果"; + SetLastError(errorMsg.str()); + CleanupContext(); + return false; + } + + uintptr_t targetFunctionAddress = results[0] + config->offset; + + { + std::stringstream addrMsg; + addrMsg << u8"目标函数地址: 0x" << std::hex << targetFunctionAddress; + SendStatus(addrMsg.str(), 0); + } + + // 6. 在目标进程中分配数据缓冲区(用于存放密钥) + SendStatus("正在分配远程数据缓冲区...", 0); + if (!g_ctx.remoteData.allocate(g_ctx.hProcess, sizeof(SharedKeyData), PAGE_READWRITE)) { + SetLastError("分配远程数据缓冲区失败"); + CleanupContext(); + return false; + } + + // 6.1 分配伪栈 + SendStatus("正在分配远程伪栈...", 0); + const SIZE_T spoofStackSize = 0x8000; // 32KB 伪栈 + if (!g_ctx.spoofStack.allocate(g_ctx.hProcess, spoofStackSize, PAGE_READWRITE)) { + SetLastError("分配远程伪栈失败"); + CleanupContext(); + return false; + } + uintptr_t spoofStackTop = reinterpret_cast(g_ctx.spoofStack.get()) + spoofStackSize - 0x20; // 留出对齐空间 + + // 7. 初始化IPC + SendStatus("正在初始化IPC通信...", 0); + std::string uniqueId = GenerateUniqueId(targetPid); + g_ctx.ipc = std::make_unique(); + if (!g_ctx.ipc->Initialize(uniqueId)) { + DWORD ipcError = GetLastError(); + SetLastError(FormatWin32Error("初始化IPC通信失败", ipcError)); + CleanupContext(); + return false; + } + + g_ctx.ipc->SetRemoteBuffer(g_ctx.hProcess, g_ctx.remoteData.get()); + g_ctx.ipc->SetDataCallback(OnDataReceived); + if (!g_ctx.ipc->StartListening()) { + DWORD ipcError = GetLastError(); + SetLastError(FormatWin32Error("启动IPC监听失败", ipcError)); + CleanupContext(); + return false; + } + + // 8. 创建hook + SendStatus("正在准备安装Hook...", 0); + g_ctx.hooker = std::make_unique(g_ctx.hProcess); + g_ctx.hooker->EnableHardwareBreakpointMode(false); // !!暂时不用硬件断点+VEH,测试期间先用稳定的Inline Hook!! + + // 9. 配置Shellcode + ShellcodeConfig shellcodeConfig{}; + shellcodeConfig.sharedMemoryAddress = g_ctx.remoteData.get(); // 使用远程分配的地址 + shellcodeConfig.eventHandle = nullptr; // 不再使用事件,改用轮询 + shellcodeConfig.trampolineAddress = 0; // 将由RemoteHooker填充 + shellcodeConfig.enableStackSpoofing = true; // 强制开启堆栈伪造 + shellcodeConfig.spoofStackPointer = spoofStackTop; + + // 10. 安装hook + SendStatus("正在安装远程Hook...", 0); + if (!g_ctx.hooker->InstallHook(targetFunctionAddress, shellcodeConfig)) { + DWORD hookError = GetLastError(); + SetLastError(FormatWin32Error("安装Hook失败", hookError)); + CleanupContext(); + return false; + } + + g_ctx.initialized = true; + SendStatus("Hook安装成功,现在登录微信...", 1); + return true; + } + + void CleanupContext() { + if (g_ctx.hooker) { + g_ctx.hooker->UninstallHook(); + g_ctx.hooker.reset(); + } + + if (g_ctx.ipc) { + g_ctx.ipc->StopListening(); + g_ctx.ipc->Cleanup(); + g_ctx.ipc.reset(); + } + + g_ctx.remoteData.reset(); + g_ctx.spoofStack.reset(); + + if (g_ctx.hProcess) { + CloseHandle(g_ctx.hProcess); + g_ctx.hProcess = nullptr; + } + + IndirectSyscalls::Cleanup(); + + if (g_ctx.csInitialized) { + EnterCriticalSection(&g_ctx.dataLock); + g_ctx.ResetDataQueues(); + LeaveCriticalSection(&g_ctx.dataLock); + g_ctx.FreeLock(); + } + + g_ctx.initialized = false; + } +} + +// 导出函数 +HOOK_API bool InitializeHook(DWORD targetPid) { + return InitializeContext(targetPid); +} + +HOOK_API bool CleanupHook() { + if (!g_ctx.initialized) { + return true; + } + SendStatus("正在清理Hook...", 0); + CleanupContext(); + return true; +} + +HOOK_API bool PollKeyData(char* keyBuffer, int bufferSize) { + if (!g_ctx.initialized || !keyBuffer || bufferSize < 65) { + return false; + } + + if (g_ctx.csInitialized) { + EnterCriticalSection(&g_ctx.dataLock); + } + + if (!g_ctx.hasNewKey) { + if (g_ctx.csInitialized) { + LeaveCriticalSection(&g_ctx.dataLock); + } + return false; + } + + size_t copyLen = (g_ctx.pendingKeyData.length() < static_cast(bufferSize - 1)) ? g_ctx.pendingKeyData.length() : static_cast(bufferSize - 1); + memcpy(keyBuffer, g_ctx.pendingKeyData.c_str(), copyLen); + keyBuffer[copyLen] = '\0'; + g_ctx.hasNewKey = false; + g_ctx.pendingKeyData.clear(); + + if (g_ctx.csInitialized) { + LeaveCriticalSection(&g_ctx.dataLock); + } + + return true; +} + +HOOK_API bool GetStatusMessage(char* statusBuffer, int bufferSize, int* outLevel) { + if (!g_ctx.initialized || !statusBuffer || bufferSize < 256 || !outLevel) { + return false; + } + + if (g_ctx.csInitialized) { + EnterCriticalSection(&g_ctx.dataLock); + } + + if (g_ctx.statusQueue.empty()) { + if (g_ctx.csInitialized) { + LeaveCriticalSection(&g_ctx.dataLock); + } + return false; + } + + StatusMessage msg = g_ctx.statusQueue.front(); + g_ctx.statusQueue.erase(g_ctx.statusQueue.begin()); + + if (g_ctx.csInitialized) { + LeaveCriticalSection(&g_ctx.dataLock); + } + + size_t copyLen = (msg.message.length() < static_cast(bufferSize - 1)) ? msg.message.length() : static_cast(bufferSize - 1); + memcpy(statusBuffer, msg.message.c_str(), copyLen); + statusBuffer[copyLen] = '\0'; + *outLevel = msg.level; + + return true; +} + +HOOK_API const char* GetLastErrorMsg() { + return g_lastError.c_str(); +} diff --git a/wx_key/src/ipc_manager.cpp b/wx_key/src/ipc_manager.cpp new file mode 100644 index 0000000..0e85ccf --- /dev/null +++ b/wx_key/src/ipc_manager.cpp @@ -0,0 +1,272 @@ +#include "../include/ipc_manager.h" +#include "../include/string_obfuscator.h" +#include +#include +#include +#include + +IPCManager::IPCManager() + : hMapFile(nullptr) + , hEvent(nullptr) + , pSharedMemory(nullptr) + , hTargetProcess(nullptr) + , pRemoteBuffer(nullptr) + , lastSequenceNumber(0) + , hListeningThread(nullptr) + , shouldStopListening(false) +{ +} + +IPCManager::~IPCManager() { + Cleanup(); +} + +bool IPCManager::Initialize(const std::string& uniqueId) { + // 在唯一标识后追加随机片段,进一步降低命名的可预测性 + std::random_device rd; + std::mt19937_64 gen(rd()); + std::uniform_int_distribution dist; + unsigned long long randSuffix = dist(gen); + + std::stringstream uniq; + uniq << uniqueId << "_" << std::hex << randSuffix; + this->uniqueId = uniq.str(); + + // 生成唯一的共享内存和事件名称 + std::string baseMemName = ObfuscatedStrings::GetSharedMemoryName(); + std::string baseEventName = ObfuscatedStrings::GetEventName(); + + // 替换{GUID}为实际的uniqueId + size_t pos = baseMemName.find("{GUID}"); + if (pos != std::string::npos) { + baseMemName.replace(pos, 6, uniqueId); + } + + pos = baseEventName.find("{GUID}"); + if (pos != std::string::npos) { + baseEventName.replace(pos, 6, uniqueId); + } + + auto tryCreateResources = [&](const std::string& memName, const std::string& evtName) -> bool { + HANDLE mapHandle = CreateFileMappingA( + INVALID_HANDLE_VALUE, + nullptr, + PAGE_READWRITE, + 0, + sizeof(SharedKeyData), + memName.c_str() + ); + + if (mapHandle == nullptr) { + return false; + } + + PVOID sharedView = MapViewOfFile( + mapHandle, + FILE_MAP_ALL_ACCESS, + 0, + 0, + sizeof(SharedKeyData) + ); + + if (sharedView == nullptr) { + DWORD err = GetLastError(); + CloseHandle(mapHandle); + SetLastError(err); + return false; + } + + ZeroMemory(sharedView, sizeof(SharedKeyData)); + + // 创建事件对象(手动重置) + HANDLE eventHandle = CreateEventA( + nullptr, + TRUE, // 手动重置 + FALSE, // 初始状态非信号 + evtName.c_str() + ); + + if (eventHandle == nullptr) { + DWORD err = GetLastError(); + UnmapViewOfFile(sharedView); + CloseHandle(mapHandle); + SetLastError(err); + return false; + } + + hMapFile = mapHandle; + hEvent = eventHandle; + pSharedMemory = sharedView; + sharedMemoryName = memName; + eventName = evtName; + return true; + }; + + auto convertGlobalToLocal = [](const std::string& name) -> std::string { + const std::string globalPrefix = "Global\\"; + if (name.rfind(globalPrefix, 0) == 0) { + return std::string("Local\\") + name.substr(globalPrefix.length()); + } + return name; + }; + + if (tryCreateResources(baseMemName, baseEventName)) { + return true; + } + + DWORD firstError = GetLastError(); + bool needsFallback = (firstError == ERROR_ACCESS_DENIED || firstError == ERROR_PRIVILEGE_NOT_HELD); + + if (needsFallback) { + std::string localMemName = convertGlobalToLocal(baseMemName); + std::string localEventName = convertGlobalToLocal(baseEventName); + + if ((localMemName != baseMemName || localEventName != baseEventName) && + tryCreateResources(localMemName, localEventName)) { + return true; + } + } + + SetLastError(firstError); + return false; +} + +void IPCManager::SetRemoteBuffer(HANDLE hProcess, PVOID remoteBufferAddr) { + hTargetProcess = hProcess; + pRemoteBuffer = remoteBufferAddr; + lastSequenceNumber = 0; +} + +void IPCManager::Cleanup() { + StopListening(); + + if (pSharedMemory) { + UnmapViewOfFile(pSharedMemory); + pSharedMemory = nullptr; + } + + if (hEvent) { + CloseHandle(hEvent); + hEvent = nullptr; + } + + if (hMapFile) { + CloseHandle(hMapFile); + hMapFile = nullptr; + } +} + +void IPCManager::SetDataCallback(std::function callback) { + dataCallback = callback; +} + +bool IPCManager::StartListening() { + if (hListeningThread != nullptr) { + return true; // 已经在监听 + } + + shouldStopListening.store(false); + if (hEvent) { + ResetEvent(hEvent); + } + hListeningThread = CreateThread( + nullptr, + 0, + ListeningThreadProc, + this, + 0, + nullptr + ); + + return (hListeningThread != nullptr); +} + +void IPCManager::StopListening() { + if (hListeningThread == nullptr) { + return; + } + + shouldStopListening.store(true); + SetEvent(hEvent); // 唤醒等待线程 + + // 等待线程退出 + WaitForSingleObject(hListeningThread, 5000); + CloseHandle(hListeningThread); + hListeningThread = nullptr; +} + +PVOID IPCManager::GetSharedMemoryAddress() const { + return pSharedMemory; +} + +HANDLE IPCManager::GetEventHandle() const { + return hEvent; +} + +DWORD WINAPI IPCManager::ListeningThreadProc(LPVOID lpParam) { + IPCManager* pThis = static_cast(lpParam); + pThis->ListeningLoop(); + return 0; +} + +void IPCManager::ListeningLoop() { + // 轮询模式:周期性读取远程进程中的缓冲区 + while (!shouldStopListening.load()) { + // 添加轻微抖动,避免稳定的轮询间隔特征 + DWORD jitteredWait = 80 + (GetTickCount() & 0x3F); // 80-143ms + DWORD waitResult = WaitForSingleObject(hEvent, jitteredWait); + if (waitResult == WAIT_OBJECT_0) { + if (shouldStopListening.load()) { + break; + } + ResetEvent(hEvent); + } + + if (!hTargetProcess || !pRemoteBuffer) { + continue; + } + + // 从远程进程读取数据 + SharedKeyData keyData; + ZeroMemory(&keyData, sizeof(keyData)); + + SIZE_T bytesRead = 0; + BOOL readResult = ReadProcessMemory( + hTargetProcess, + pRemoteBuffer, + &keyData, + sizeof(SharedKeyData), + &bytesRead + ); + + if (readResult && bytesRead == sizeof(SharedKeyData)) { + // 检查是否有新数据(通过序列号判断) + if (keyData.dataSize > 0 && + keyData.dataSize <= 32 && + keyData.sequenceNumber != lastSequenceNumber && + keyData.sequenceNumber != 0) { + + // 更新序列号 + lastSequenceNumber = keyData.sequenceNumber; + + // 调用回调函数 + if (dataCallback) { + dataCallback(keyData); + } + + // 清空远程缓冲区(防止重复读取) + SharedKeyData zeroData; + ZeroMemory(&zeroData, sizeof(zeroData)); + SIZE_T bytesWritten = 0; + WriteProcessMemory( + hTargetProcess, + pRemoteBuffer, + &zeroData, + sizeof(SharedKeyData), + &bytesWritten + ); + } + } + } +} + diff --git a/wx_key/src/remote_hooker.cpp b/wx_key/src/remote_hooker.cpp new file mode 100644 index 0000000..4fee8a5 --- /dev/null +++ b/wx_key/src/remote_hooker.cpp @@ -0,0 +1,418 @@ +#include "../include/remote_hooker.h" +#include "../include/syscalls.h" +#include "../include/shellcode_builder.h" +#include +#include + +// 简单的x64反汇编长度检测器 +// 支持常见指令,用于计算需要备份多少字节 +namespace X64Disasm { + // 检查是否为REX前缀 (0x40-0x4F) + inline bool IsRexPrefix(BYTE b) { + return (b >= 0x40 && b <= 0x4F); + } + + // 获取单条指令的长度 + size_t GetInstructionLength(const BYTE* code) { + size_t len = 0; + bool hasRex = false; + + // 跳过REX前缀 + if (IsRexPrefix(code[len])) { + hasRex = true; + len++; + } + + BYTE opcode = code[len]; + len++; + + // 常见指令长度判断(简化版) + switch (opcode) { + // 单字节指令 + case 0x50: case 0x51: case 0x52: case 0x53: + case 0x54: case 0x55: case 0x56: case 0x57: + case 0x58: case 0x59: case 0x5A: case 0x5B: + case 0x5C: case 0x5D: case 0x5E: case 0x5F: + case 0x90: case 0xC3: case 0xCC: + return len; + + // MOV指令 + case 0x88: case 0x89: case 0x8A: case 0x8B: + len++; // ModRM + if ((code[len-1] & 0xC0) != 0xC0) { + // 有内存操作数 + BYTE modrm = code[len-1]; + BYTE mod = (modrm >> 6) & 3; + BYTE rm = modrm & 7; + + if (rm == 4) len++; // 有SIB字节 + if (mod == 1) len++; // disp8 + else if (mod == 2) len += 4; // disp32 + } + return len; + + // 立即数指令 + case 0xB0: case 0xB1: case 0xB2: case 0xB3: + case 0xB4: case 0xB5: case 0xB6: case 0xB7: + return len + 1; // imm8 + + case 0xB8: case 0xB9: case 0xBA: case 0xBB: + case 0xBC: case 0xBD: case 0xBE: case 0xBF: + return len + (hasRex ? 8 : 4); // imm32/64 + + // 短跳转 + case 0x70: case 0x71: case 0x72: case 0x73: + case 0x74: case 0x75: case 0x76: case 0x77: + case 0x78: case 0x79: case 0x7A: case 0x7B: + case 0x7C: case 0x7D: case 0x7E: case 0x7F: + case 0xEB: + return len + 1; // rel8 + + case 0xE8: case 0xE9: // CALL/JMP rel32 + return len + 4; + + // 双字节指令 + case 0x0F: + len++; + opcode = code[len-1]; + if (opcode >= 0x80 && opcode <= 0x8F) { + return len + 4; // 条件跳转 rel32 + } + return len + 1; // 简化处理 + + // LEA + case 0x8D: + len++; // ModRM + if ((code[len-1] & 0x07) == 4) len++; // SIB + if (((code[len-1] >> 6) & 3) == 2) len += 4; // disp32 + return len; + + default: + // 未知指令,返回最小长度 + return len + 1; + } + } +} + +RemoteHooker::RemoteHooker(HANDLE hProcess) + : hProcess(hProcess) + , targetAddress(0) + , remoteShellcodeAddress(0) + , trampolineAddress(0) + , isHookInstalled(false) + , useHardwareBreakpoint(false) +{ +} + +RemoteHooker::~RemoteHooker() { + UninstallHook(); +} + +namespace { + void DebugProtectChange(const char* label, void* address, SIZE_T size, DWORD protect) { + char buffer[160]{}; + _snprintf_s(buffer, sizeof(buffer) - 1, _TRUNCATE, "[RemoteHooker] %s addr=%p size=%zu prot=0x%lx\n", + label, address, static_cast(size), static_cast(protect)); + OutputDebugStringA(buffer); + } +} + +PVOID RemoteHooker::RemoteAllocate(SIZE_T size, DWORD protect) { + PVOID baseAddress = nullptr; + SIZE_T regionSize = size; + + NTSTATUS status = IndirectSyscalls::NtAllocateVirtualMemory( + hProcess, + &baseAddress, + 0, + ®ionSize, + MEM_COMMIT | MEM_RESERVE, + protect + ); + + return (status == STATUS_SUCCESS) ? baseAddress : nullptr; +} + +bool RemoteHooker::RemoteWrite(PVOID address, const void* data, SIZE_T size) { + SIZE_T bytesWritten = 0; + + NTSTATUS status = IndirectSyscalls::NtWriteVirtualMemory( + hProcess, + address, + (PVOID)data, + size, + &bytesWritten + ); + + return (status == STATUS_SUCCESS && bytesWritten == size); +} + +bool RemoteHooker::RemoteRead(PVOID address, void* buffer, SIZE_T size) { + SIZE_T bytesRead = 0; + + NTSTATUS status = IndirectSyscalls::NtReadVirtualMemory( + hProcess, + address, + buffer, + size, + &bytesRead + ); + + return (status == STATUS_SUCCESS && bytesRead == size); +} + +bool RemoteHooker::RemoteProtect(PVOID address, SIZE_T size, DWORD newProtect, DWORD* oldProtect) { + ULONG oldProt = 0; + + NTSTATUS status = IndirectSyscalls::NtProtectVirtualMemory( + hProcess, + &address, + &size, + newProtect, + &oldProt + ); + + if (oldProtect) { + *oldProtect = oldProt; + } + + return (status == STATUS_SUCCESS); +} + +size_t RemoteHooker::CalculateHookLength(const BYTE* code) { + size_t totalLen = 0; + const size_t minLen = 14; // 我们需要至少14字节来放置长跳转 + + while (totalLen < minLen) { + size_t instrLen = X64Disasm::GetInstructionLength(code + totalLen); + if (instrLen == 0) { + return 0; // 失败 + } + totalLen += instrLen; + } + + return totalLen; +} + +bool RemoteHooker::CreateTrampoline(uintptr_t targetAddr) { + // 读取目标地址的原始字节 + BYTE originalCode[32]; + if (!RemoteRead((PVOID)targetAddr, originalCode, sizeof(originalCode))) { + return false; + } + + // 计算需要备份的指令长度 + size_t hookLen = CalculateHookLength(originalCode); + if (hookLen == 0 || hookLen > 32) { + return false; + } + + originalBytes.assign(originalCode, originalCode + hookLen); + + // 分配Trampoline内存 + // Trampoline = 原始指令 + 跳转回原函数的JMP指令 + SIZE_T trampolineSize = hookLen + 14; // 原始指令 + 长跳转 + RemoteMemory trampMem; + if (!trampMem.allocate(hProcess, trampolineSize, PAGE_READWRITE)) { + return false; + } + PVOID trampolineAddr = trampMem.get(); + + trampolineAddress = (uintptr_t)trampolineAddr; + + // 写入原始指令 + if (!RemoteWrite(trampolineAddr, originalCode, hookLen)) { + return false; + } + + // 生成跳转回原函数的指令 + uintptr_t returnAddress = targetAddr + hookLen; + std::vector jmpBack = GenerateJumpInstruction(trampolineAddress + hookLen, returnAddress); + + if (!RemoteWrite((PVOID)(trampolineAddress + hookLen), jmpBack.data(), jmpBack.size())) { + trampolineAddress = 0; + return false; + } + + if (!trampMem.protect(PAGE_EXECUTE_READ)) { + trampolineAddress = 0; + return false; + } + DebugProtectChange("trampoline RX", trampolineAddr, trampolineSize, PAGE_EXECUTE_READ); + trampolineMemory = std::move(trampMem); + + return true; +} + +std::vector RemoteHooker::GenerateJumpInstruction(uintptr_t from, uintptr_t to) { + std::vector jmp; + + // 计算相对偏移 + INT64 offset = (INT64)to - (INT64)from - 5; + + // 如果可以使用5字节短跳转(rel32) + if (offset >= INT32_MIN && offset <= INT32_MAX) { + jmp.push_back(0xE9); // JMP rel32 + INT32 offset32 = (INT32)offset; + jmp.push_back((BYTE)(offset32 & 0xFF)); + jmp.push_back((BYTE)((offset32 >> 8) & 0xFF)); + jmp.push_back((BYTE)((offset32 >> 16) & 0xFF)); + jmp.push_back((BYTE)((offset32 >> 24) & 0xFF)); + } + else { + // 使用14字节长跳转 + // mov rax, addr64 + jmp.push_back(0x48); + jmp.push_back(0xB8); + for (int i = 0; i < 8; i++) { + jmp.push_back((BYTE)((to >> (i * 8)) & 0xFF)); + } + // jmp rax + jmp.push_back(0xFF); + jmp.push_back(0xE0); + } + + return jmp; +} + +bool RemoteHooker::InstallHook(uintptr_t targetFunctionAddress, const ShellcodeConfig& shellcodeConfig) { + if (isHookInstalled) { + return false; // 已经安装过Hook + } + + targetAddress = targetFunctionAddress; + + // 1. 创建Trampoline + if (!CreateTrampoline(targetAddress)) { + return false; + } + + // 2. 构建Shellcode(需要更新配置以包含正确的trampoline地址) + ShellcodeConfig updatedConfig = shellcodeConfig; + updatedConfig.trampolineAddress = trampolineAddress; + + ShellcodeBuilder builder; + std::vector shellcode = builder.BuildHookShellcode(updatedConfig); + + // 3. 在远程进程中分配Shellcode内存 + RemoteMemory shellMem; + if (!shellMem.allocate(hProcess, shellcode.size(), PAGE_READWRITE)) { + trampolineMemory.reset(); + trampolineAddress = 0; + return false; + } + PVOID remoteShellcode = shellMem.get(); + remoteShellcodeAddress = (uintptr_t)remoteShellcode; + + // 4. 写入Shellcode + if (!RemoteWrite(remoteShellcode, shellcode.data(), shellcode.size())) { + trampolineMemory.reset(); + trampolineAddress = 0; + remoteShellcodeAddress = 0; + return false; + } + + if (!shellMem.protect(PAGE_EXECUTE_READ)) { + trampolineMemory.reset(); + trampolineAddress = 0; + remoteShellcodeAddress = 0; + return false; + } + DebugProtectChange("shellcode RX", remoteShellcode, shellcode.size(), PAGE_EXECUTE_READ); + shellcodeMemory = std::move(shellMem); + + DWORD shellOldProtect = 0; + if (!RemoteProtect(remoteShellcode, shellcode.size(), PAGE_EXECUTE_READ, &shellOldProtect)) { + shellcodeMemory.reset(); + shellcodeMemory = RemoteMemory(); + trampolineMemory.reset(); + remoteShellcodeAddress = 0; + trampolineAddress = 0; + return false; + } + DebugProtectChange("shellcode RX", remoteShellcode, shellcode.size(), PAGE_EXECUTE_READ); + + // 5. 生成Hook跳转指令 + std::vector hookJump = GenerateJumpInstruction(targetAddress, remoteShellcodeAddress); + + // 确保有足够的空间 + if (hookJump.size() > originalBytes.size()) { + shellcodeMemory.reset(); + shellcodeMemory = RemoteMemory(); + trampolineMemory.reset(); + remoteShellcodeAddress = 0; + trampolineAddress = 0; + return false; + } + + // 填充NOP + while (hookJump.size() < originalBytes.size()) { + hookJump.push_back(0x90); // NOP + } + + if (useHardwareBreakpoint) { + // 硬件断点模式:不写补丁,直接返回,由上层负责设置断点/VEH + isHookInstalled = true; + return true; + } else { + // Inline Patch 模式(回退) + // 6. 修改目标函数的保护属性 + DWORD oldProtect; + if (!RemoteProtect((PVOID)targetAddress, originalBytes.size(), PAGE_EXECUTE_READWRITE, &oldProtect)) { + shellcodeMemory.reset(); + shellcodeMemory = RemoteMemory(); + trampolineMemory.reset(); + remoteShellcodeAddress = 0; + trampolineAddress = 0; + return false; + } + + // 7. 写入Hook跳转指令(原子操作) + bool writeSuccess = RemoteWrite((PVOID)targetAddress, hookJump.data(), hookJump.size()); + + // 8. 恢复原始保护属性 + DWORD tempProtect; + RemoteProtect((PVOID)targetAddress, originalBytes.size(), oldProtect, &tempProtect); + + if (!writeSuccess) { + shellcodeMemory.reset(); + shellcodeMemory = RemoteMemory(); + trampolineMemory.reset(); + remoteShellcodeAddress = 0; + trampolineAddress = 0; + return false; + } + + isHookInstalled = true; + return true; + } +} + +bool RemoteHooker::UninstallHook() { + if (!isHookInstalled) { + return true; + } + + bool restoreSuccess = true; + + if (!useHardwareBreakpoint) { + // Inline patch 模式才需要恢复补丁 + DWORD oldProtect; + if (!RemoteProtect((PVOID)targetAddress, originalBytes.size(), PAGE_EXECUTE_READWRITE, &oldProtect)) { + return false; + } + restoreSuccess = RemoteWrite((PVOID)targetAddress, originalBytes.data(), originalBytes.size()); + DWORD tempProtect; + RemoteProtect((PVOID)targetAddress, originalBytes.size(), oldProtect, &tempProtect); + } + + // 4. 释放远程内存 + shellcodeMemory.reset(); + remoteShellcodeAddress = 0; + trampolineMemory.reset(); + trampolineAddress = 0; + + isHookInstalled = false; + return restoreSuccess; +} + diff --git a/wx_key/src/remote_scanner.cpp b/wx_key/src/remote_scanner.cpp new file mode 100644 index 0000000..87eef10 --- /dev/null +++ b/wx_key/src/remote_scanner.cpp @@ -0,0 +1,245 @@ +#include "../include/remote_scanner.h" +#include "../include/syscalls.h" +#include "../include/string_obfuscator.h" +#include +#include +#include +#include + +#pragma comment(lib, "Psapi.lib") +#pragma comment(lib, "version.lib") + +// 版本配置管理器静态成员 +std::vector VersionConfigManager::configs; +bool VersionConfigManager::initialized = false; + +namespace { + using VersionArray = std::array; + + bool ParseVersionString(const std::string& version, VersionArray& outParts) { + outParts.fill(0); + std::stringstream ss(version); + std::string segment; + size_t index = 0; + + while (std::getline(ss, segment, '.') && index < outParts.size()) { + try { + outParts[index++] = std::stoi(segment); + } catch (const std::exception&) { + return false; + } + } + + return index > 0; + } + + int CompareVersions(const VersionArray& lhs, const VersionArray& rhs) { + for (size_t i = 0; i < lhs.size(); ++i) { + if (lhs[i] < rhs[i]) return -1; + if (lhs[i] > rhs[i]) return 1; + } + return 0; + } +} + +void VersionConfigManager::InitializeConfigs() { + if (initialized) return; + + // 微信 4.1.4 及以上配置(含 4.1.4.x、4.1.5.x 等) + configs.push_back(WeChatVersionConfig( + ">=4.1.4", + {0x24, 0x08, 0x48, 0x89, 0x6c, 0x24, 0x10, 0x48, 0x89, 0x74, 0x00, 0x18, 0x48, 0x89, 0x7c, 0x00, 0x20, 0x41, 0x56, 0x48, 0x83, 0xec, 0x50, 0x41}, + "xxxxxxxxxx?xxxx?xxxxxxxx", + -3 + )); + + // 微信 4.1.4 以下(含 4.1.0-4.1.3 与 4.0.x)的通用配置 + configs.push_back(WeChatVersionConfig( + "<4.1.4", + {0x24, 0x50, 0x48, 0xc7, 0x45, 0x00, 0xfe, 0xff, 0xff, 0xff, 0x44, 0x89, 0xcf, 0x44, 0x89, 0xc3, 0x49, 0x89, 0xd6, 0x48, 0x89, 0xce, 0x48, 0x89}, + "xxxxxxxxxxxxxxxxxxxxxxxx", + -0xf + )); + + initialized = true; +} + +const WeChatVersionConfig* VersionConfigManager::GetConfigForVersion(const std::string& version) { + InitializeConfigs(); + + if (configs.size() < 2 || version.empty()) { + return nullptr; + } + + VersionArray parsedVersion; + if (!ParseVersionString(version, parsedVersion)) { + return nullptr; + } + + constexpr VersionArray baseline414 = {4, 1, 4, 0}; + + if (CompareVersions(parsedVersion, baseline414) >= 0) { + return &configs[0]; + } + + if ((parsedVersion[0] == 4 && parsedVersion[1] == 1 && parsedVersion[2] < 4) || + (parsedVersion[0] == 4 && parsedVersion[1] == 0)) { + return &configs[1]; + } + + return nullptr; +} + +// RemoteScanner实现 +RemoteScanner::RemoteScanner(HANDLE hProcess) + : hProcess(hProcess) +{ + // 预分配扫描缓冲区(2MB) + scanBuffer.reserve(2 * 1024 * 1024); +} + +RemoteScanner::~RemoteScanner() { +} + +bool RemoteScanner::GetRemoteModuleInfo(const std::string& moduleName, RemoteModuleInfo& outInfo) { + // 枚举远程进程的模块 + HMODULE hMods[1024]; + DWORD cbNeeded; + + if (!EnumProcessModules(this->hProcess, hMods, sizeof(hMods), &cbNeeded)) { + return false; + } + + DWORD moduleCount = cbNeeded / sizeof(HMODULE); + + for (DWORD i = 0; i < moduleCount; i++) { + char szModName[MAX_PATH]; + + if (GetModuleBaseNameA(this->hProcess, hMods[i], szModName, sizeof(szModName) / sizeof(char))) { + if (_stricmp(szModName, moduleName.c_str()) == 0) { + MODULEINFO modInfo; + if (GetModuleInformation(this->hProcess, hMods[i], &modInfo, sizeof(modInfo))) { + outInfo.baseAddress = hMods[i]; + outInfo.imageSize = modInfo.SizeOfImage; + outInfo.moduleName = szModName; + return true; + } + } + } + } + + return false; +} + +bool RemoteScanner::MatchPattern(const BYTE* data, const BYTE* pattern, const char* mask, size_t length) { + for (size_t i = 0; i < length; i++) { + if (mask[i] != '?' && data[i] != pattern[i]) { + return false; + } + } + return true; +} + +uintptr_t RemoteScanner::FindPattern(const RemoteModuleInfo& moduleInfo, const BYTE* pattern, const char* mask) { + auto results = FindAllPatterns(moduleInfo, pattern, mask); + return results.empty() ? 0 : results[0]; +} + +std::vector RemoteScanner::FindAllPatterns(const RemoteModuleInfo& moduleInfo, const BYTE* pattern, const char* mask) { + std::vector results; + + size_t patternLength = strlen(mask); + uintptr_t baseAddress = (uintptr_t)moduleInfo.baseAddress; + SIZE_T imageSize = moduleInfo.imageSize; + + // 分块读取和扫描 + const SIZE_T CHUNK_SIZE = 1024 * 1024; // 1MB chunks + scanBuffer.resize(CHUNK_SIZE + patternLength); + + for (SIZE_T offset = 0; offset < imageSize; offset += CHUNK_SIZE) { + SIZE_T readSize = min(CHUNK_SIZE + patternLength, imageSize - offset); + SIZE_T bytesRead = 0; + + // 使用间接系统调用读取内存 + NTSTATUS status = IndirectSyscalls::NtReadVirtualMemory( + this->hProcess, + (PVOID)(baseAddress + offset), + scanBuffer.data(), + readSize, + &bytesRead + ); + + if (status != STATUS_SUCCESS || bytesRead == 0) { + continue; + } + + if (bytesRead < patternLength) { + continue; + } + + // 在本地缓冲区中搜索特征码 + for (SIZE_T i = 0; i + patternLength <= bytesRead; ++i) { + if (MatchPattern(&scanBuffer[i], pattern, mask, patternLength)) { + results.push_back(baseAddress + offset + i); + } + } + } + + return results; +} + +bool RemoteScanner::ReadRemoteMemory(uintptr_t address, void* buffer, SIZE_T size) { + SIZE_T bytesRead = 0; + NTSTATUS status = IndirectSyscalls::NtReadVirtualMemory( + this->hProcess, + (PVOID)address, + buffer, + size, + &bytesRead + ); + + return (status == STATUS_SUCCESS && bytesRead == size); +} + +std::string RemoteScanner::GetWeChatVersion() { + std::string weixinDllName = ObfuscatedStrings::GetWeixinDllName(); + + RemoteModuleInfo moduleInfo; + if (!GetRemoteModuleInfo(weixinDllName, moduleInfo)) { + return ""; + } + + // 读取模块路径 + WCHAR modulePath[MAX_PATH]; + if (GetModuleFileNameExW(this->hProcess, moduleInfo.baseAddress, modulePath, MAX_PATH) == 0) { + return ""; + } + + // 获取文件版本信息 + DWORD handle = 0; + DWORD versionSize = GetFileVersionInfoSizeW(modulePath, &handle); + if (versionSize == 0) { + return ""; + } + + std::vector versionData(versionSize); + if (!GetFileVersionInfoW(modulePath, handle, versionSize, versionData.data())) { + return ""; + } + + VS_FIXEDFILEINFO* fileInfo = nullptr; + UINT fileInfoSize = 0; + if (VerQueryValueW(versionData.data(), L"\\", (LPVOID*)&fileInfo, &fileInfoSize) && fileInfo) { + DWORD major = HIWORD(fileInfo->dwProductVersionMS); + DWORD minor = LOWORD(fileInfo->dwProductVersionMS); + DWORD build = HIWORD(fileInfo->dwProductVersionLS); + DWORD revision = LOWORD(fileInfo->dwProductVersionLS); + + std::stringstream ss; + ss << major << "." << minor << "." << build << "." << revision; + return ss.str(); + } + + return ""; +} + diff --git a/wx_key/src/remote_veh.cpp b/wx_key/src/remote_veh.cpp new file mode 100644 index 0000000..557129c --- /dev/null +++ b/wx_key/src/remote_veh.cpp @@ -0,0 +1,283 @@ +#include "../include/remote_veh.h" +#include "../include/syscalls.h" +#include +#include +#include +#include +#include +#include +#include +#include + +namespace { + constexpr SIZE_T RIP_OFFSET = offsetof(CONTEXT, Rip); + constexpr SIZE_T EFLAGS_OFFSET = offsetof(CONTEXT, EFlags); + + struct RemoteData { + uint64_t target; + uint64_t shellcode; + uint64_t vehHandle; + }; + + void DebugProtectChange(const char* label, void* address, SIZE_T size, DWORD protect) { + char buffer[160]{}; + _snprintf_s(buffer, sizeof(buffer) - 1, _TRUNCATE, "[RemoteVEH] %s addr=%p size=%zu prot=0x%lx\n", + label, address, static_cast(size), static_cast(protect)); + OutputDebugStringA(buffer); + } + + bool SetHardwareBreakpointThread(HANDLE hProcess, DWORD tid, uintptr_t address) { + HANDLE hThread = OpenThread(THREAD_ALL_ACCESS, FALSE, tid); + if (!hThread) return false; + + CONTEXT ctx{}; + ctx.ContextFlags = CONTEXT_DEBUG_REGISTERS; + bool ok = false; + if (GetThreadContext(hThread, &ctx)) { + ctx.Dr0 = address; + ctx.Dr7 |= 0x1; + ok = SetThreadContext(hThread, &ctx) == TRUE; + } + CloseHandle(hThread); + return ok; + } + + bool SetHardwareBreakpointAllThreads(HANDLE hProcess, uintptr_t address) { + HANDLE snap = CreateToolhelp32Snapshot(TH32CS_SNAPTHREAD, 0); + if (snap == INVALID_HANDLE_VALUE) return false; + THREADENTRY32 te{}; + te.dwSize = sizeof(te); + DWORD pid = GetProcessId(hProcess); + bool ok = true; + if (Thread32First(snap, &te)) { + do { + if (te.th32OwnerProcessID == pid) { + if (!SetHardwareBreakpointThread(hProcess, te.th32ThreadID, address)) { + ok = false; + } + } + } while (Thread32Next(snap, &te)); + } + CloseHandle(snap); + return ok; + } + + void ClearHardwareBreakpointAllThreads(HANDLE hProcess) { + HANDLE snap = CreateToolhelp32Snapshot(TH32CS_SNAPTHREAD, 0); + if (snap == INVALID_HANDLE_VALUE) return; + THREADENTRY32 te{}; + te.dwSize = sizeof(te); + DWORD pid = GetProcessId(hProcess); + if (Thread32First(snap, &te)) { + do { + if (te.th32OwnerProcessID == pid) { + HANDLE hThread = OpenThread(THREAD_ALL_ACCESS, FALSE, te.th32ThreadID); + if (hThread) { + CONTEXT ctx{}; + ctx.ContextFlags = CONTEXT_DEBUG_REGISTERS; + if (GetThreadContext(hThread, &ctx)) { + ctx.Dr0 = 0; + ctx.Dr7 = 0; + SetThreadContext(hThread, &ctx); + } + CloseHandle(hThread); + } + } + } while (Thread32Next(snap, &te)); + } + CloseHandle(snap); + } + + uintptr_t GetRemoteProcAddress(HANDLE hProcess, const char* moduleName, FARPROC localProc) { + if (!moduleName) { + return 0; + } + + HMODULE localMod = GetModuleHandleA(moduleName); + if (!localMod || !localProc) return 0; + uintptr_t localBase = reinterpret_cast(localMod); + uintptr_t localAddr = reinterpret_cast(localProc); + uintptr_t offset = localAddr - localBase; + + HMODULE hMods[512]; + DWORD cbNeeded = 0; + if (!EnumProcessModulesEx(hProcess, hMods, sizeof(hMods), &cbNeeded, LIST_MODULES_ALL)) { + return 0; + } + size_t count = cbNeeded / sizeof(HMODULE); + for (size_t i = 0; i < count; ++i) { + char modName[MAX_PATH]{}; + if (GetModuleBaseNameA(hProcess, hMods[i], modName, MAX_PATH)) { + if (_stricmp(modName, moduleName) == 0) { + uintptr_t remoteBase = reinterpret_cast(hMods[i]); + return remoteBase + offset; + } + } + } + return 0; + } + + bool BuildRemoteCode(HANDLE hProcess, uintptr_t target, uintptr_t shellcode, FARPROC addVeh, FARPROC removeVeh, RemoteVehHandle& outHandle) { + std::vector buf; + buf.reserve(512); + + // data + RemoteData localData{}; + localData.target = target; + localData.shellcode = shellcode; + localData.vehHandle = 0; + buf.insert(buf.end(), reinterpret_cast(&localData), reinterpret_cast(&localData) + sizeof(localData)); + + auto emit = [&](std::initializer_list bytes) { + buf.insert(buf.end(), bytes.begin(), bytes.end()); + }; + + auto emitDisp32 = [&](uint32_t disp) { + emit({ + static_cast(disp & 0xFF), + static_cast((disp >> 8) & 0xFF), + static_cast((disp >> 16) & 0xFF), + static_cast((disp >> 24) & 0xFF) + }); + }; + + // handler + size_t handlerOffset = buf.size(); + emit({0x48,0x8B,0x01}); // mov rax,[rcx] (ExceptionRecord) + emit({0x8B,0x00}); // mov eax,[rax] (ExceptionCode) + emit({0x3D,0x04,0x00,0x00,0x80}); // cmp eax,0x80000004 + emit({0x75,0x2B}); // jne cont + emit({0x48,0x8B,0x51,0x08}); // mov rdx,[rcx+8] (Context) + emit({0x48,0xB8}); size_t tgtPatch = buf.size(); buf.resize(buf.size()+8); // mov rax,target + emit({0x48,0x3B,0x82}); emitDisp32(static_cast(RIP_OFFSET)); // cmp rax,[rdx+RIP] + emit({0x75,0x14}); // jne cont + emit({0x48,0xB8}); size_t shPatch = buf.size(); buf.resize(buf.size()+8); // mov rax,shellcode + emit({0x48,0x89,0x82}); emitDisp32(static_cast(RIP_OFFSET)); // mov [rdx+RIP],rax + emit({0x48,0x8B,0x82}); emitDisp32(static_cast(EFLAGS_OFFSET)); // mov rax,[rdx+EFLAGS] + emit({0x48,0x25,0xFF,0xFE,0xFF,0xFF}); // and rax,~0x100 + emit({0x48,0x89,0x82}); emitDisp32(static_cast(EFLAGS_OFFSET)); // mov [rdx+EFLAGS],rax + emit({0xB8,0xFF,0xFF,0xFF,0xFF}); // mov eax,-1 + emit({0xC3}); // ret + emit({0x33,0xC0}); // xor eax,eax + emit({0xC3}); // ret + + // register stub + size_t regOffset = buf.size(); + emit({0x48,0x83,0xEC,0x28}); // sub rsp,28h + emit({0x48,0x31,0xC9}); // xor rcx,rcx + emit({0x48,0x83,0xC1,0x01}); // inc rcx (FirstHandler) + emit({0x48,0xBA}); size_t hPatch = buf.size(); buf.resize(buf.size()+8); // mov rdx,handler + emit({0x48,0xB8}); size_t addPatch = buf.size(); buf.resize(buf.size()+8); // mov rax,AddVEH + emit({0xFF,0xD0}); // call rax + emit({0x48,0xA3}); size_t storePatch = buf.size(); buf.resize(buf.size()+8); // mov [vehHandle], rax + emit({0x48,0x83,0xC4,0x28}); // add rsp,28h + emit({0xC3}); // ret + + // unregister stub + size_t unregOffset = buf.size(); + emit({0x48,0x83,0xEC,0x28}); // sub rsp,28h + emit({0x48,0xA1}); size_t loadPatch = buf.size(); buf.resize(buf.size()+8); // mov rax,[vehHandle] + emit({0x48,0x85,0xC0}); // test rax,rax + emit({0x74,0x14}); // je skip + emit({0x48,0x89,0xC1}); // mov rcx,rax + emit({0x48,0xB8}); size_t remPatch = buf.size(); buf.resize(buf.size()+8); // mov rax,RemoveVEH + emit({0xFF,0xD0}); // call rax + emit({0x48,0x83,0xC4,0x28}); // add rsp,28h + emit({0xC3}); // ret + + // allocate remote + RemoteMemory remoteBlock; + SIZE_T allocSize = buf.size(); + if (!remoteBlock.allocate(hProcess, allocSize, PAGE_READWRITE)) { + return false; + } + + uintptr_t base = reinterpret_cast(remoteBlock.get()); + uintptr_t handlerAddr = base + handlerOffset; + uintptr_t regAddr = base + regOffset; + uintptr_t unregAddr = base + unregOffset; + uintptr_t vehHandleAddr = base + offsetof(RemoteData, vehHandle); + + // patches + memcpy(buf.data() + tgtPatch, &target, sizeof(uint64_t)); + memcpy(buf.data() + shPatch, &shellcode, sizeof(uint64_t)); + memcpy(buf.data() + hPatch, &handlerAddr, sizeof(uint64_t)); + uint64_t addAddr = reinterpret_cast(addVeh); + memcpy(buf.data() + addPatch, &addAddr, sizeof(uint64_t)); + memcpy(buf.data() + storePatch, &vehHandleAddr, sizeof(uint64_t)); + memcpy(buf.data() + loadPatch, &vehHandleAddr, sizeof(uint64_t)); + uint64_t remAddr = reinterpret_cast(removeVeh); + memcpy(buf.data() + remPatch, &remAddr, sizeof(uint64_t)); + + if (!WriteProcessMemory(hProcess, remoteBlock.get(), buf.data(), buf.size(), nullptr)) { + return false; + } + + // 切换为RX,避免长期RWX + if (!remoteBlock.protect(PAGE_EXECUTE_READ)) { + return false; + } + DebugProtectChange("veh block RX", remoteBlock.get(), allocSize, PAGE_EXECUTE_READ); + + HANDLE hThread = CreateRemoteThread(hProcess, nullptr, 0, (LPTHREAD_START_ROUTINE)regAddr, nullptr, 0, nullptr); + if (!hThread) { + return false; + } + WaitForSingleObject(hThread, 5000); + CloseHandle(hThread); + + outHandle.remoteMemory = std::move(remoteBlock); + outHandle.dataBlock = outHandle.remoteMemory.get(); + outHandle.unregStubAddress = unregAddr; + outHandle.vehHandle = reinterpret_cast(vehHandleAddr); // store address for reference + outHandle.installed = true; + return true; + } +} + +RemoteVehHandle InstallRemoteVeh(const RemoteVehConfig& cfg) { + RemoteVehHandle handle = {}; + if (!cfg.hProcess || cfg.targetAddress == 0 || cfg.shellcodeAddress == 0) { + return handle; + } + + HMODULE hKernel32 = GetModuleHandleA("kernel32.dll"); + if (!hKernel32) { + return handle; + } + FARPROC addVehLocal = GetProcAddress(hKernel32, "AddVectoredExceptionHandler"); + FARPROC removeVehLocal = GetProcAddress(hKernel32, "RemoveVectoredExceptionHandler"); + uintptr_t addVeh = GetRemoteProcAddress(cfg.hProcess, "kernel32.dll", addVehLocal); + uintptr_t removeVeh = GetRemoteProcAddress(cfg.hProcess, "kernel32.dll", removeVehLocal); + if (!addVeh || !removeVeh) { + return handle; + } + + if (!BuildRemoteCode(cfg.hProcess, cfg.targetAddress, cfg.shellcodeAddress, reinterpret_cast(addVeh), reinterpret_cast(removeVeh), handle)) { + return handle; + } + + // handler 注册完成后再设置硬件断点,避免未注册时命中异常 + if (!SetHardwareBreakpointAllThreads(cfg.hProcess, cfg.targetAddress)) { + UninstallRemoteVeh(cfg, handle); + RemoteVehHandle emptyHandle = {}; + return emptyHandle; + } + + return handle; +} + +void UninstallRemoteVeh(const RemoteVehConfig& cfg, RemoteVehHandle& handle) { + ClearHardwareBreakpointAllThreads(cfg.hProcess); + + if (handle.installed && handle.dataBlock) { + HANDLE hThread = CreateRemoteThread(cfg.hProcess, nullptr, 0, (LPTHREAD_START_ROUTINE)handle.unregStubAddress, nullptr, 0, nullptr); + if (hThread) { + WaitForSingleObject(hThread, 5000); + CloseHandle(hThread); + } + handle.remoteMemory.reset(); + handle.dataBlock = nullptr; + } + handle.installed = false; +} diff --git a/wx_key/src/shellcode_builder.cpp b/wx_key/src/shellcode_builder.cpp new file mode 100644 index 0000000..72a40ed --- /dev/null +++ b/wx_key/src/shellcode_builder.cpp @@ -0,0 +1,150 @@ +#include "../include/shellcode_builder.h" +#include "../include/ipc_manager.h" +#include +#include + +namespace { + constexpr size_t kSharedDataSizeOffset = offsetof(SharedKeyData, dataSize); + constexpr size_t kSharedKeyBufferOffset = offsetof(SharedKeyData, keyBuffer); + constexpr size_t kSharedSequenceOffset = offsetof(SharedKeyData, sequenceNumber); +} + +ShellcodeBuilder::ShellcodeBuilder() { + shellcode.reserve(512); +} + +ShellcodeBuilder::~ShellcodeBuilder() { +} + +void ShellcodeBuilder::Clear() { + shellcode.clear(); +} + +size_t ShellcodeBuilder::GetShellcodeSize() const { + return shellcode.size(); +} + +// 使用 Xbyak 生成 Hook Shellcode +std::vector ShellcodeBuilder::BuildHookShellcode(const ShellcodeConfig& config) { + shellcode.clear(); + + // 只支持 x64 + if (sizeof(void*) != 8) { + return shellcode; + } + + const bool enableStackSpoofing = config.enableStackSpoofing && config.spoofStackPointer != 0; + uint64_t spoofStackAligned = 0; + if (enableStackSpoofing) { + spoofStackAligned = static_cast(config.spoofStackPointer) & ~static_cast(0xF); + } + + // 生成机器码 + Xbyak::CodeGenerator code(1024, Xbyak::AutoGrow); + + Xbyak::Label skipCopy; + + auto emitSaveRegs = [&]() { + code.pushfq(); + code.push(code.rax); + code.push(code.rcx); + code.push(code.rdx); + code.push(code.rbx); + code.push(code.rbp); + code.push(code.rsi); + code.push(code.rdi); + code.push(code.r8); + code.push(code.r9); + code.push(code.r10); + code.push(code.r11); + code.push(code.r12); + code.push(code.r13); + code.push(code.r14); + code.push(code.r15); + }; + + auto emitRestoreRegs = [&]() { + code.pop(code.r15); + code.pop(code.r14); + code.pop(code.r13); + code.pop(code.r12); + code.pop(code.r11); + code.pop(code.r10); + code.pop(code.r9); + code.pop(code.r8); + code.pop(code.rdi); + code.pop(code.rsi); + code.pop(code.rbp); + code.pop(code.rbx); + code.pop(code.rdx); + code.pop(code.rcx); + code.pop(code.rax); + code.popfq(); + }; + + if (enableStackSpoofing) { + // 将关键寄存器暂存到真实栈,再切换到对齐后的伪栈 + code.push(code.rax); // 保存原始 rax + code.push(code.r10); // 保存原始 r10 + code.push(code.r11); // 保存原始 r11 + + // rsp + 24 对应切换前的真实栈指针 + code.lea(code.rax, code.ptr[code.rsp + 24]); // rax = original rsp + + // 切换到伪栈(对齐到16字节),预留一定空间 + code.mov(code.rsp, spoofStackAligned); + code.sub(code.rsp, 0x20); + + // 将真实 RSP 存到伪栈,并构造一个假的返回地址槽位 + code.push(code.rax); // [rsp] = original rsp + code.push(0); // 伪造返回地址,不破坏通用寄存器 + + // 恢复 r11/r10/rax 的原始值,确保后续保存寄存器时是原值 + code.mov(code.r11, code.qword[code.rax - 24]); + code.mov(code.r10, code.qword[code.rax - 16]); + code.mov(code.rax, code.qword[code.rax - 8]); + } + + // ===== 保存寄存器/标志位 ===== + emitSaveRegs(); + + // ===== keySize 检查 ===== + code.mov(code.rax, code.ptr[code.rdx + 0x10]); // rax = keySize + code.cmp(code.rax, 32); + code.jne(skipCopy); + + // ===== 拷贝 32 字节密钥到共享内存 ===== + code.mov(code.rcx, code.ptr[code.rdx + 0x08]); // rcx = pKeyBuffer + code.mov(code.rdx, (uint64_t)config.sharedMemoryAddress); + code.mov(code.rdi, code.rdx); + code.mov(code.dword[code.rdi + static_cast(kSharedDataSizeOffset)], 32); // dataSize = 32 + code.add(code.rdi, static_cast(kSharedKeyBufferOffset)); // rdi -> keyBuffer + code.mov(code.rsi, code.rcx); // rsi = source + code.mov(code.rcx, 32); // count + code.rep(); + code.movsb(); // rep movsb + + // ===== 递增序列号 ===== + code.mov(code.eax, code.dword[code.rdx + static_cast(kSharedSequenceOffset)]); // 读取 sequenceNumber + code.inc(code.eax); + code.mov(code.dword[code.rdx + static_cast(kSharedSequenceOffset)], code.eax); // 写回递增后的序列号 + + code.L(skipCopy); + + // ===== 恢复寄存器/标志位 ===== + emitRestoreRegs(); + + if (enableStackSpoofing) { + // 丢弃伪造返回地址并恢复真实 RSP,切回原始栈 + code.add(code.rsp, 8); // skip fake return slot + code.pop(code.rsp); + } + + // ===== 跳回 Trampoline ===== + code.mov(code.rax, (uint64_t)config.trampolineAddress); + code.jmp(code.rax); + + // 输出机器码 + shellcode.assign(code.getCode(), code.getCode() + code.getSize()); + return shellcode; +} diff --git a/wx_key/src/syscalls.cpp b/wx_key/src/syscalls.cpp new file mode 100644 index 0000000..7ad1b94 --- /dev/null +++ b/wx_key/src/syscalls.cpp @@ -0,0 +1,277 @@ +#include "../include/syscalls.h" +#include "../include/string_obfuscator.h" +#include +#include +#include +#include + +// 静态成员初始化 +bool IndirectSyscalls::initialized = false; +pNtOpenProcess IndirectSyscalls::fnNtOpenProcess = nullptr; +pNtReadVirtualMemory IndirectSyscalls::fnNtReadVirtualMemory = nullptr; +pNtWriteVirtualMemory IndirectSyscalls::fnNtWriteVirtualMemory = nullptr; +pNtAllocateVirtualMemory IndirectSyscalls::fnNtAllocateVirtualMemory = nullptr; +pNtFreeVirtualMemory IndirectSyscalls::fnNtFreeVirtualMemory = nullptr; +pNtProtectVirtualMemory IndirectSyscalls::fnNtProtectVirtualMemory = nullptr; +pNtQueryInformationProcess IndirectSyscalls::fnNtQueryInformationProcess = nullptr; +// 直调stub +pNtOpenProcess IndirectSyscalls::scNtOpenProcess = nullptr; +pNtReadVirtualMemory IndirectSyscalls::scNtReadVirtualMemory = nullptr; +pNtWriteVirtualMemory IndirectSyscalls::scNtWriteVirtualMemory = nullptr; +pNtAllocateVirtualMemory IndirectSyscalls::scNtAllocateVirtualMemory = nullptr; +pNtFreeVirtualMemory IndirectSyscalls::scNtFreeVirtualMemory = nullptr; +pNtProtectVirtualMemory IndirectSyscalls::scNtProtectVirtualMemory = nullptr; +pNtQueryInformationProcess IndirectSyscalls::scNtQueryInformationProcess = nullptr; + +template +bool IndirectSyscalls::ResolveFunction(const char* functionName, T& functionPointer) { + std::string ntdllName = ObfuscatedStrings::GetNtdllName(); + HMODULE hNtdll = GetModuleHandleA(ntdllName.c_str()); + if (!hNtdll) { + return false; + } + + functionPointer = reinterpret_cast(GetProcAddress(hNtdll, functionName)); + return (functionPointer != nullptr); +} + +namespace { + // 标准 stub 前缀:mov r10, rcx; mov eax, imm32; syscall; ret + bool LooksLikePatchedStub(void* fn) { + if (!fn) return true; + const uint8_t* code = reinterpret_cast(fn); + // 4C 8B D1 B8 xx xx xx xx 0F 05 C3 + constexpr std::array kPrefix = {0x4C, 0x8B, 0xD1}; + for (size_t i = 0; i < kPrefix.size(); ++i) { + if (code[i] != kPrefix[i]) { + return true; + } + } + return false; + } + + FARPROC LoadCleanNtdllFunction(const char* functionName) { + wchar_t sysDir[MAX_PATH]{}; + if (GetSystemDirectoryW(sysDir, MAX_PATH) == 0) { + return nullptr; + } + + wchar_t ntdllPath[MAX_PATH]{}; + if (FAILED(StringCchPrintfW(ntdllPath, MAX_PATH, L"%s\\ntdll.dll", sysDir))) { + return nullptr; + } + + HMODULE hNtdll = LoadLibraryExW( + ntdllPath, + nullptr, + LOAD_LIBRARY_AS_DATAFILE_EXCLUSIVE | LOAD_LIBRARY_AS_IMAGE_RESOURCE | LOAD_LIBRARY_SEARCH_SYSTEM32 + ); + if (!hNtdll) { + return nullptr; + } + + FARPROC fn = GetProcAddress(hNtdll, functionName); + FreeLibrary(hNtdll); + return fn; + } + + void* ChooseSyscallSource(const char* functionName) { + void* fn = reinterpret_cast(GetProcAddress(GetModuleHandleA(ObfuscatedStrings::GetNtdllName().c_str()), functionName)); + if (fn && !LooksLikePatchedStub(fn)) { + return fn; + } + + FARPROC clean = LoadCleanNtdllFunction(functionName); + if (clean) { + return reinterpret_cast(clean); + } + return fn; + } +} // namespace + +bool IndirectSyscalls::Initialize() { + if (initialized) { + return true; + } + + bool success = true; + success &= ResolveFunction("NtOpenProcess", fnNtOpenProcess); + success &= ResolveFunction("NtReadVirtualMemory", fnNtReadVirtualMemory); + success &= ResolveFunction("NtWriteVirtualMemory", fnNtWriteVirtualMemory); + success &= ResolveFunction("NtAllocateVirtualMemory", fnNtAllocateVirtualMemory); + success &= ResolveFunction("NtFreeVirtualMemory", fnNtFreeVirtualMemory); + success &= ResolveFunction("NtProtectVirtualMemory", fnNtProtectVirtualMemory); + success &= ResolveFunction("NtQueryInformationProcess", fnNtQueryInformationProcess); + + // 构建直调 stub,必要时使用干净的 ntdll 提取 SSN + scNtOpenProcess = reinterpret_cast(CreateSyscallStub(ExtractSyscallNumber(ChooseSyscallSource("NtOpenProcess")))); + scNtReadVirtualMemory = reinterpret_cast(CreateSyscallStub(ExtractSyscallNumber(ChooseSyscallSource("NtReadVirtualMemory")))); + scNtWriteVirtualMemory = reinterpret_cast(CreateSyscallStub(ExtractSyscallNumber(ChooseSyscallSource("NtWriteVirtualMemory")))); + scNtAllocateVirtualMemory = reinterpret_cast(CreateSyscallStub(ExtractSyscallNumber(ChooseSyscallSource("NtAllocateVirtualMemory")))); + scNtFreeVirtualMemory = reinterpret_cast(CreateSyscallStub(ExtractSyscallNumber(ChooseSyscallSource("NtFreeVirtualMemory")))); + scNtProtectVirtualMemory = reinterpret_cast(CreateSyscallStub(ExtractSyscallNumber(ChooseSyscallSource("NtProtectVirtualMemory")))); + scNtQueryInformationProcess = reinterpret_cast(CreateSyscallStub(ExtractSyscallNumber(ChooseSyscallSource("NtQueryInformationProcess")))); + + initialized = success; + return success; +} + +void IndirectSyscalls::Cleanup() { + // 清理资源 + initialized = false; +} + +NTSTATUS IndirectSyscalls::NtOpenProcess( + PHANDLE ProcessHandle, + ACCESS_MASK DesiredAccess, + PMY_OBJECT_ATTRIBUTES ObjectAttributes, + PMY_CLIENT_ID ClientId +) { + if (!initialized || !fnNtOpenProcess) { + return STATUS_UNSUCCESSFUL; + } + if (scNtOpenProcess) { + return scNtOpenProcess(ProcessHandle, DesiredAccess, ObjectAttributes, ClientId); + } + return fnNtOpenProcess(ProcessHandle, DesiredAccess, ObjectAttributes, ClientId); +} + +NTSTATUS IndirectSyscalls::NtReadVirtualMemory( + HANDLE ProcessHandle, + PVOID BaseAddress, + PVOID Buffer, + SIZE_T BufferSize, + PSIZE_T NumberOfBytesRead +) { + if (!initialized || !fnNtReadVirtualMemory) { + return STATUS_UNSUCCESSFUL; + } + if (scNtReadVirtualMemory) { + return scNtReadVirtualMemory(ProcessHandle, BaseAddress, Buffer, BufferSize, NumberOfBytesRead); + } + return fnNtReadVirtualMemory(ProcessHandle, BaseAddress, Buffer, BufferSize, NumberOfBytesRead); +} + +NTSTATUS IndirectSyscalls::NtWriteVirtualMemory( + HANDLE ProcessHandle, + PVOID BaseAddress, + PVOID Buffer, + SIZE_T BufferSize, + PSIZE_T NumberOfBytesWritten +) { + if (!initialized || !fnNtWriteVirtualMemory) { + return STATUS_UNSUCCESSFUL; + } + if (scNtWriteVirtualMemory) { + return scNtWriteVirtualMemory(ProcessHandle, BaseAddress, Buffer, BufferSize, NumberOfBytesWritten); + } + return fnNtWriteVirtualMemory(ProcessHandle, BaseAddress, Buffer, BufferSize, NumberOfBytesWritten); +} + +NTSTATUS IndirectSyscalls::NtAllocateVirtualMemory( + HANDLE ProcessHandle, + PVOID* BaseAddress, + ULONG_PTR ZeroBits, + PSIZE_T RegionSize, + ULONG AllocationType, + ULONG Protect +) { + if (!initialized || !fnNtAllocateVirtualMemory) { + return STATUS_UNSUCCESSFUL; + } + if (scNtAllocateVirtualMemory) { + return scNtAllocateVirtualMemory(ProcessHandle, BaseAddress, ZeroBits, RegionSize, AllocationType, Protect); + } + return fnNtAllocateVirtualMemory(ProcessHandle, BaseAddress, ZeroBits, RegionSize, AllocationType, Protect); +} + +NTSTATUS IndirectSyscalls::NtFreeVirtualMemory( + HANDLE ProcessHandle, + PVOID* BaseAddress, + PSIZE_T RegionSize, + ULONG FreeType +) { + if (!initialized || !fnNtFreeVirtualMemory) { + return STATUS_UNSUCCESSFUL; + } + if (scNtFreeVirtualMemory) { + return scNtFreeVirtualMemory(ProcessHandle, BaseAddress, RegionSize, FreeType); + } + return fnNtFreeVirtualMemory(ProcessHandle, BaseAddress, RegionSize, FreeType); +} + +NTSTATUS IndirectSyscalls::NtProtectVirtualMemory( + HANDLE ProcessHandle, + PVOID* BaseAddress, + PSIZE_T RegionSize, + ULONG NewProtect, + PULONG OldProtect +) { + if (!initialized || !fnNtProtectVirtualMemory) { + return STATUS_UNSUCCESSFUL; + } + if (scNtProtectVirtualMemory) { + return scNtProtectVirtualMemory(ProcessHandle, BaseAddress, RegionSize, NewProtect, OldProtect); + } + return fnNtProtectVirtualMemory(ProcessHandle, BaseAddress, RegionSize, NewProtect, OldProtect); +} + +NTSTATUS IndirectSyscalls::NtQueryInformationProcess( + HANDLE ProcessHandle, + PROCESSINFOCLASS ProcessInformationClass, + PVOID ProcessInformation, + ULONG ProcessInformationLength, + PULONG ReturnLength +) { + if (!initialized || !fnNtQueryInformationProcess) { + return STATUS_UNSUCCESSFUL; + } + if (scNtQueryInformationProcess) { + return scNtQueryInformationProcess(ProcessHandle, ProcessInformationClass, ProcessInformation, ProcessInformationLength, ReturnLength); + } + return fnNtQueryInformationProcess(ProcessHandle, ProcessInformationClass, ProcessInformation, ProcessInformationLength, ReturnLength); +} + +uint32_t IndirectSyscalls::ExtractSyscallNumber(void* fnAddress) { + if (!fnAddress) { + return UINT32_MAX; + } + const uint8_t* code = reinterpret_cast(fnAddress); + // 扫描前24字节,寻找 mov eax, imm32 + for (size_t i = 0; i < 24; ++i) { + if (code[i] == 0xB8 && i + 4 < 24) { // mov eax, imm32 + uint32_t ssn = *reinterpret_cast(code + i + 1); + return ssn; + } + } + return UINT32_MAX; +} + +void* IndirectSyscalls::CreateSyscallStub(uint32_t ssn) { + if (ssn == UINT32_MAX) { + return nullptr; + } + + // mov r10, rcx; mov eax, ssn; syscall; ret + uint8_t stubTemplate[] = { + 0x4C, 0x8B, 0xD1, // mov r10, rcx + 0xB8, 0x00, 0x00, 0x00, 0x00, // mov eax, ssn + 0x0F, 0x05, // syscall + 0xC3 // ret + }; + + stubTemplate[4] = (uint8_t)(ssn & 0xFF); + stubTemplate[5] = (uint8_t)((ssn >> 8) & 0xFF); + stubTemplate[6] = (uint8_t)((ssn >> 16) & 0xFF); + stubTemplate[7] = (uint8_t)((ssn >> 24) & 0xFF); + + void* mem = VirtualAlloc(nullptr, sizeof(stubTemplate), MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); + if (!mem) { + return nullptr; + } + memcpy(mem, stubTemplate, sizeof(stubTemplate)); + DWORD oldProt = 0; + VirtualProtect(mem, sizeof(stubTemplate), PAGE_EXECUTE_READ, &oldProt); + return mem; +} + diff --git a/wx_key/src/veh_hook_manager.cpp b/wx_key/src/veh_hook_manager.cpp new file mode 100644 index 0000000..c643900 --- /dev/null +++ b/wx_key/src/veh_hook_manager.cpp @@ -0,0 +1,156 @@ +#include "../include/veh_hook_manager.h" +#include +#include + +namespace { + VehHookManager* g_instance = nullptr; +} + +VehHookManager::VehHookManager() + : targetAddress(0) + , vehHandle(nullptr) + , installed(false) { +} + +VehHookManager::~VehHookManager() { + Uninstall(); +} + +bool VehHookManager::SetHardwareBreakpoint(uintptr_t address) { + HANDLE snapshot = CreateToolhelp32Snapshot(TH32CS_SNAPTHREAD, 0); + if (snapshot == INVALID_HANDLE_VALUE) { + return false; + } + + THREADENTRY32 te{}; + te.dwSize = sizeof(te); + std::vector> threads; + + if (Thread32First(snapshot, &te)) { + DWORD currentPid = GetCurrentProcessId(); + do { + if (te.th32OwnerProcessID == currentPid) { + HANDLE hThread = OpenThread(THREAD_ALL_ACCESS, FALSE, te.th32ThreadID); + if (hThread) { + threads.emplace_back(te.th32ThreadID, hThread); + } + } + } while (Thread32Next(snapshot, &te)); + } + CloseHandle(snapshot); + + bool ok = true; + for (const auto& entry : threads) { + DWORD threadId = entry.first; + HANDLE hThread = entry.second; + CONTEXT ctx; + ZeroMemory(&ctx, sizeof(ctx)); + ctx.ContextFlags = CONTEXT_DEBUG_REGISTERS; + + if (!GetThreadContext(hThread, &ctx)) { + ok = false; + CloseHandle(hThread); + continue; + } + + originalContexts[threadId] = ctx; // 保存每个线程的原始状态 + ctx.Dr0 = address; + ctx.Dr7 |= 0x1; // 启用 DR0 全局断点 + + if (!SetThreadContext(hThread, &ctx)) { + ok = false; + } + CloseHandle(hThread); + } + return ok; +} + +void VehHookManager::ClearHardwareBreakpoint() { + HANDLE snapshot = CreateToolhelp32Snapshot(TH32CS_SNAPTHREAD, 0); + if (snapshot == INVALID_HANDLE_VALUE) { + return; + } + + THREADENTRY32 te{}; + te.dwSize = sizeof(te); + if (Thread32First(snapshot, &te)) { + DWORD currentPid = GetCurrentProcessId(); + do { + if (te.th32OwnerProcessID == currentPid) { + HANDLE hThread = OpenThread(THREAD_ALL_ACCESS, FALSE, te.th32ThreadID); + if (hThread) { + CONTEXT ctx{}; + auto it = originalContexts.find(te.th32ThreadID); + if (it != originalContexts.end()) { + ctx = it->second; + } else { + ZeroMemory(&ctx, sizeof(ctx)); + ctx.ContextFlags = CONTEXT_DEBUG_REGISTERS; + } + ctx.Dr0 = 0; + ctx.Dr7 = 0; + SetThreadContext(hThread, &ctx); + CloseHandle(hThread); + } + } + } while (Thread32Next(snapshot, &te)); + } + CloseHandle(snapshot); +} + +bool VehHookManager::Install(uintptr_t address, std::function callback) { + if (installed) { + return false; + } + + targetAddress = address; + userCallback = std::move(callback); + + if (!SetHardwareBreakpoint(targetAddress)) { + return false; + } + + vehHandle = AddVectoredExceptionHandler(1, VectoredHandler); + if (!vehHandle) { + ClearHardwareBreakpoint(); + return false; + } + + g_instance = this; + installed = true; + return true; +} + +void VehHookManager::Uninstall() { + if (!installed) { + return; + } + + ClearHardwareBreakpoint(); + + if (vehHandle) { + RemoveVectoredExceptionHandler(vehHandle); + vehHandle = nullptr; + } + + originalContexts.clear(); + installed = false; + g_instance = nullptr; +} + +LONG CALLBACK VehHookManager::VectoredHandler(EXCEPTION_POINTERS* info) { + if (!g_instance || info->ExceptionRecord->ExceptionCode != EXCEPTION_SINGLE_STEP) { + return EXCEPTION_CONTINUE_SEARCH; + } + + if (info->ContextRecord->Rip == g_instance->targetAddress) { + if (g_instance->userCallback) { + g_instance->userCallback(info); + } + // 清除单步标志,继续执行 + info->ContextRecord->EFlags &= ~0x100; + return EXCEPTION_CONTINUE_EXECUTION; + } + + return EXCEPTION_CONTINUE_SEARCH; +} diff --git a/wx_key/vendor/MinHook_134_lib/include/MinHook.h b/wx_key/vendor/MinHook_134_lib/include/MinHook.h new file mode 100644 index 0000000..492d83f --- /dev/null +++ b/wx_key/vendor/MinHook_134_lib/include/MinHook.h @@ -0,0 +1,185 @@ +/* + * MinHook - The Minimalistic API Hooking Library for x64/x86 + * Copyright (C) 2009-2017 Tsuda Kageyu. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if !(defined _M_IX86) && !(defined _M_X64) && !(defined __i386__) && !(defined __x86_64__) + #error MinHook supports only x86 and x64 systems. +#endif + +#include + +// MinHook Error Codes. +typedef enum MH_STATUS +{ + // Unknown error. Should not be returned. + MH_UNKNOWN = -1, + + // Successful. + MH_OK = 0, + + // MinHook is already initialized. + MH_ERROR_ALREADY_INITIALIZED, + + // MinHook is not initialized yet, or already uninitialized. + MH_ERROR_NOT_INITIALIZED, + + // The hook for the specified target function is already created. + MH_ERROR_ALREADY_CREATED, + + // The hook for the specified target function is not created yet. + MH_ERROR_NOT_CREATED, + + // The hook for the specified target function is already enabled. + MH_ERROR_ENABLED, + + // The hook for the specified target function is not enabled yet, or already + // disabled. + MH_ERROR_DISABLED, + + // The specified pointer is invalid. It points the address of non-allocated + // and/or non-executable region. + MH_ERROR_NOT_EXECUTABLE, + + // The specified target function cannot be hooked. + MH_ERROR_UNSUPPORTED_FUNCTION, + + // Failed to allocate memory. + MH_ERROR_MEMORY_ALLOC, + + // Failed to change the memory protection. + MH_ERROR_MEMORY_PROTECT, + + // The specified module is not loaded. + MH_ERROR_MODULE_NOT_FOUND, + + // The specified function is not found. + MH_ERROR_FUNCTION_NOT_FOUND +} +MH_STATUS; + +// Can be passed as a parameter to MH_EnableHook, MH_DisableHook, +// MH_QueueEnableHook or MH_QueueDisableHook. +#define MH_ALL_HOOKS NULL + +#ifdef __cplusplus +extern "C" { +#endif + + // Initialize the MinHook library. You must call this function EXACTLY ONCE + // at the beginning of your program. + MH_STATUS WINAPI MH_Initialize(VOID); + + // Uninitialize the MinHook library. You must call this function EXACTLY + // ONCE at the end of your program. + MH_STATUS WINAPI MH_Uninitialize(VOID); + + // Creates a hook for the specified target function, in disabled state. + // Parameters: + // pTarget [in] A pointer to the target function, which will be + // overridden by the detour function. + // pDetour [in] A pointer to the detour function, which will override + // the target function. + // ppOriginal [out] A pointer to the trampoline function, which will be + // used to call the original target function. + // This parameter can be NULL. + MH_STATUS WINAPI MH_CreateHook(LPVOID pTarget, LPVOID pDetour, LPVOID *ppOriginal); + + // Creates a hook for the specified API function, in disabled state. + // Parameters: + // pszModule [in] A pointer to the loaded module name which contains the + // target function. + // pszProcName [in] A pointer to the target function name, which will be + // overridden by the detour function. + // pDetour [in] A pointer to the detour function, which will override + // the target function. + // ppOriginal [out] A pointer to the trampoline function, which will be + // used to call the original target function. + // This parameter can be NULL. + MH_STATUS WINAPI MH_CreateHookApi( + LPCWSTR pszModule, LPCSTR pszProcName, LPVOID pDetour, LPVOID *ppOriginal); + + // Creates a hook for the specified API function, in disabled state. + // Parameters: + // pszModule [in] A pointer to the loaded module name which contains the + // target function. + // pszProcName [in] A pointer to the target function name, which will be + // overridden by the detour function. + // pDetour [in] A pointer to the detour function, which will override + // the target function. + // ppOriginal [out] A pointer to the trampoline function, which will be + // used to call the original target function. + // This parameter can be NULL. + // ppTarget [out] A pointer to the target function, which will be used + // with other functions. + // This parameter can be NULL. + MH_STATUS WINAPI MH_CreateHookApiEx( + LPCWSTR pszModule, LPCSTR pszProcName, LPVOID pDetour, LPVOID *ppOriginal, LPVOID *ppTarget); + + // Removes an already created hook. + // Parameters: + // pTarget [in] A pointer to the target function. + MH_STATUS WINAPI MH_RemoveHook(LPVOID pTarget); + + // Enables an already created hook. + // Parameters: + // pTarget [in] A pointer to the target function. + // If this parameter is MH_ALL_HOOKS, all created hooks are + // enabled in one go. + MH_STATUS WINAPI MH_EnableHook(LPVOID pTarget); + + // Disables an already created hook. + // Parameters: + // pTarget [in] A pointer to the target function. + // If this parameter is MH_ALL_HOOKS, all created hooks are + // disabled in one go. + MH_STATUS WINAPI MH_DisableHook(LPVOID pTarget); + + // Queues to enable an already created hook. + // Parameters: + // pTarget [in] A pointer to the target function. + // If this parameter is MH_ALL_HOOKS, all created hooks are + // queued to be enabled. + MH_STATUS WINAPI MH_QueueEnableHook(LPVOID pTarget); + + // Queues to disable an already created hook. + // Parameters: + // pTarget [in] A pointer to the target function. + // If this parameter is MH_ALL_HOOKS, all created hooks are + // queued to be disabled. + MH_STATUS WINAPI MH_QueueDisableHook(LPVOID pTarget); + + // Applies all queued changes in one go. + MH_STATUS WINAPI MH_ApplyQueued(VOID); + + // Translates the MH_STATUS to its name as a string. + const char * WINAPI MH_StatusToString(MH_STATUS status); + +#ifdef __cplusplus +} +#endif diff --git a/wx_key/vendor/MinHook_134_lib/lib/libMinHook.x64.lib b/wx_key/vendor/MinHook_134_lib/lib/libMinHook.x64.lib new file mode 100644 index 0000000..e211ad6 Binary files /dev/null and b/wx_key/vendor/MinHook_134_lib/lib/libMinHook.x64.lib differ diff --git a/wx_key/vendor/MinHook_134_lib/lib/libMinHook.x86.lib b/wx_key/vendor/MinHook_134_lib/lib/libMinHook.x86.lib new file mode 100644 index 0000000..b15b79b Binary files /dev/null and b/wx_key/vendor/MinHook_134_lib/lib/libMinHook.x86.lib differ diff --git a/wx_key/vendor/xbyak/xbyak/xbyak.h b/wx_key/vendor/xbyak/xbyak/xbyak.h new file mode 100644 index 0000000..81f08d2 --- /dev/null +++ b/wx_key/vendor/xbyak/xbyak/xbyak.h @@ -0,0 +1,3470 @@ +#pragma once +#ifndef XBYAK_XBYAK_H_ +#define XBYAK_XBYAK_H_ +/*! + @file xbyak.h + @brief Xbyak ; JIT assembler for x86(IA32)/x64 by C++ + @author herumi + @url https://github.com/herumi/xbyak + @note modified new BSD license + http://opensource.org/licenses/BSD-3-Clause +*/ +#if (not +0) && !defined(XBYAK_NO_OP_NAMES) // trick to detect whether 'not' is operator or not + #define XBYAK_NO_OP_NAMES +#endif + +#include // for debug print +#include +#include +#include +#include +#ifndef NDEBUG +#include +#endif + +// #define XBYAK_DISABLE_AVX512 + +#if !defined(XBYAK_USE_MMAP_ALLOCATOR) && !defined(XBYAK_DONT_USE_MMAP_ALLOCATOR) + #define XBYAK_USE_MMAP_ALLOCATOR +#endif +#if !defined(__GNUC__) || defined(__MINGW32__) + #undef XBYAK_USE_MMAP_ALLOCATOR +#endif + +#ifdef __GNUC__ + #define XBYAK_GNUC_PREREQ(major, minor) ((__GNUC__) * 100 + (__GNUC_MINOR__) >= (major) * 100 + (minor)) +#else + #define XBYAK_GNUC_PREREQ(major, minor) 0 +#endif + +// This covers -std=(gnu|c)++(0x|11|1y), -stdlib=libc++, and modern Microsoft. +#if ((defined(_MSC_VER) && (_MSC_VER >= 1600)) || defined(_LIBCPP_VERSION) ||\ + ((__cplusplus >= 201103) || defined(__GXX_EXPERIMENTAL_CXX0X__))) + #include + #define XBYAK_STD_UNORDERED_SET std::unordered_set + #include + #define XBYAK_STD_UNORDERED_MAP std::unordered_map + #define XBYAK_STD_UNORDERED_MULTIMAP std::unordered_multimap + +/* + Clang/llvm-gcc and ICC-EDG in 'GCC-mode' always claim to be GCC 4.2, using + libstdcxx 20070719 (from GCC 4.2.1, the last GPL 2 version). +*/ +#elif XBYAK_GNUC_PREREQ(4, 5) || (XBYAK_GNUC_PREREQ(4, 2) && __GLIBCXX__ >= 20070719) || defined(__INTEL_COMPILER) || defined(__llvm__) + #include + #define XBYAK_STD_UNORDERED_SET std::tr1::unordered_set + #include + #define XBYAK_STD_UNORDERED_MAP std::tr1::unordered_map + #define XBYAK_STD_UNORDERED_MULTIMAP std::tr1::unordered_multimap + +#elif defined(_MSC_VER) && (_MSC_VER >= 1500) && (_MSC_VER < 1600) + #include + #define XBYAK_STD_UNORDERED_SET std::tr1::unordered_set + #include + #define XBYAK_STD_UNORDERED_MAP std::tr1::unordered_map + #define XBYAK_STD_UNORDERED_MULTIMAP std::tr1::unordered_multimap + +#else + #include + #define XBYAK_STD_UNORDERED_SET std::set + #include + #define XBYAK_STD_UNORDERED_MAP std::map + #define XBYAK_STD_UNORDERED_MULTIMAP std::multimap +#endif +#ifdef _WIN32 + #ifndef WIN32_LEAN_AND_MEAN + #define WIN32_LEAN_AND_MEAN + #endif + #include + #include + #ifdef _MSC_VER + #define XBYAK_TLS __declspec(thread) + #else + #define XBYAK_TLS __thread + #endif +#elif defined(__GNUC__) + #include + #include + #include + #define XBYAK_TLS __thread +#endif +#if defined(__APPLE__) && !defined(XBYAK_DONT_USE_MAP_JIT) + #define XBYAK_USE_MAP_JIT + #include + #ifndef MAP_JIT + #define MAP_JIT 0x800 + #endif +#endif +#if !defined(_MSC_VER) || (_MSC_VER >= 1600) + #include +#endif + +// MFD_CLOEXEC defined only linux 3.17 or later. +// Android wraps the memfd_create syscall from API version 30. +#if !defined(MFD_CLOEXEC) || (defined(__ANDROID__) && __ANDROID_API__ < 30) + #undef XBYAK_USE_MEMFD +#endif + +#if defined(_WIN64) || defined(__MINGW64__) || (defined(__CYGWIN__) && defined(__x86_64__)) + #define XBYAK64_WIN +#elif defined(__x86_64__) + #define XBYAK64_GCC +#endif +#if !defined(XBYAK64) && !defined(XBYAK32) + #if defined(XBYAK64_GCC) || defined(XBYAK64_WIN) + #define XBYAK64 + #else + #define XBYAK32 + #endif +#endif + +#if (__cplusplus >= 201103) || (defined(_MSC_VER) && _MSC_VER >= 1900) + #undef XBYAK_TLS + #define XBYAK_TLS thread_local + #define XBYAK_VARIADIC_TEMPLATE + #define XBYAK_NOEXCEPT noexcept + #define XBYAK_OVERRIDE override +#else + #define XBYAK_NOEXCEPT throw() + #define XBYAK_OVERRIDE +#endif + +// require c++14 or later +// Visual Studio 2017 version 15.0 or later +// g++-6 or later +#if ((__cplusplus >= 201402L) && !(!defined(__clang__) && defined(__GNUC__) && (__GNUC__ <= 5))) || (defined(_MSC_VER) && _MSC_VER >= 1910) + #define XBYAK_CONSTEXPR constexpr +#else + #define XBYAK_CONSTEXPR +#endif + +#ifdef _MSC_VER + #pragma warning(push) + #pragma warning(disable : 4514) /* remove inline function */ + #pragma warning(disable : 4786) /* identifier is too long */ + #pragma warning(disable : 4503) /* name is too long */ + #pragma warning(disable : 4127) /* constant expresison */ +#endif + +// disable -Warray-bounds because it may be a bug of gcc. https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104603 +#if defined(__GNUC__) && !defined(__clang__) + #define XBYAK_DISABLE_WARNING_ARRAY_BOUNDS + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Warray-bounds" +#endif + +// Define this macro as 0 to disable strict checking of memory operand and register size matching. +// This macro may be removed in future versions. +#ifndef XBYAK_STRICT_CHECK_MEM_REG_SIZE + #define XBYAK_STRICT_CHECK_MEM_REG_SIZE 1 +#endif + +namespace Xbyak { + +enum { + DEFAULT_MAX_CODE_SIZE = 4096, + VERSION = 0x7300 /* 0xABCD = A.BC(.D) */ +}; + +#ifndef MIE_INTEGER_TYPE_DEFINED +#define MIE_INTEGER_TYPE_DEFINED +// for backward compatibility +typedef uint64_t uint64; +typedef int64_t sint64; +typedef uint32_t uint32; +typedef uint16_t uint16; +typedef uint8_t uint8; +#endif + +#ifndef MIE_ALIGN + #ifdef _MSC_VER + #define MIE_ALIGN(x) __declspec(align(x)) + #else + #define MIE_ALIGN(x) __attribute__((aligned(x))) + #endif +#endif +#ifndef MIE_PACK // for shufps + #define MIE_PACK(x, y, z, w) ((x) * 64 + (y) * 16 + (z) * 4 + (w)) +#endif + +enum { + ERR_NONE = 0, + ERR_BAD_ADDRESSING, + ERR_CODE_IS_TOO_BIG, + ERR_BAD_SCALE, + ERR_ESP_CANT_BE_INDEX, + ERR_BAD_COMBINATION, + ERR_BAD_SIZE_OF_REGISTER, + ERR_IMM_IS_TOO_BIG, + ERR_BAD_ALIGN, + ERR_LABEL_IS_REDEFINED, + ERR_LABEL_IS_TOO_FAR, + ERR_LABEL_IS_NOT_FOUND, + ERR_CODE_ISNOT_COPYABLE, + ERR_BAD_PARAMETER, + ERR_CANT_PROTECT, + ERR_CANT_USE_64BIT_DISP, + ERR_OFFSET_IS_TOO_BIG, + ERR_MEM_SIZE_IS_NOT_SPECIFIED, + ERR_BAD_MEM_SIZE, + ERR_BAD_ST_COMBINATION, + ERR_OVER_LOCAL_LABEL, // not used + ERR_UNDER_LOCAL_LABEL, + ERR_CANT_ALLOC, + ERR_ONLY_T_NEAR_IS_SUPPORTED_IN_AUTO_GROW, + ERR_BAD_PROTECT_MODE, + ERR_BAD_PNUM, + ERR_BAD_TNUM, + ERR_BAD_VSIB_ADDRESSING, + ERR_CANT_CONVERT, + ERR_LABEL_ISNOT_SET_BY_L, + ERR_LABEL_IS_ALREADY_SET_BY_L, + ERR_BAD_LABEL_STR, + ERR_MUNMAP, + ERR_OPMASK_IS_ALREADY_SET, + ERR_ROUNDING_IS_ALREADY_SET, + ERR_K0_IS_INVALID, + ERR_EVEX_IS_INVALID, + ERR_SAE_IS_INVALID, + ERR_ER_IS_INVALID, + ERR_INVALID_BROADCAST, + ERR_INVALID_OPMASK_WITH_MEMORY, + ERR_INVALID_ZERO, + ERR_INVALID_RIP_IN_AUTO_GROW, + ERR_INVALID_MIB_ADDRESS, + ERR_X2APIC_IS_NOT_SUPPORTED, + ERR_NOT_SUPPORTED, + ERR_SAME_REGS_ARE_INVALID, + ERR_INVALID_NF, + ERR_INVALID_ZU, + ERR_CANT_USE_REX2, + ERR_INVALID_DFV, + ERR_INVALID_REG_IDX, + ERR_BAD_ENCODING_MODE, + ERR_CANT_USE_ABCDH, + ERR_INTERNAL // Put it at last. +}; + +inline const char *ConvertErrorToString(int err) +{ + static const char *errTbl[] = { + "none", + "bad addressing", + "code is too big", + "bad scale", + "esp can't be index", + "bad combination", + "bad size of register", + "imm is too big", + "bad align", + "label is redefined", + "label is too far", + "label is not found", + "code is not copyable", + "bad parameter", + "can't protect", + "can't use 64bit disp(use (void*))", + "offset is too big", + "MEM size is not specified", + "bad mem size", + "bad st combination", + "over local label", + "under local label", + "can't alloc", + "T_SHORT is not supported in AutoGrow", + "bad protect mode", + "bad pNum", + "bad tNum", + "bad vsib addressing", + "can't convert", + "label is not set by L()", + "label is already set by L()", + "bad label string", + "err munmap", + "opmask is already set", + "rounding is already set", + "k0 is invalid", + "evex is invalid", + "sae(suppress all exceptions) is invalid", + "er(embedded rounding) is invalid", + "invalid broadcast", + "invalid opmask with memory", + "invalid zero", + "invalid rip in AutoGrow", + "invalid mib address", + "x2APIC is not supported", + "not supported", + "same regs are invalid", + "invalid NF", + "invalid ZU", + "can't use rex2", + "invalid dfv", + "invalid reg index", + "bad encoding mode", + "can't use [abcd]h with rex", + "internal error" + }; + assert(ERR_INTERNAL + 1 == sizeof(errTbl) / sizeof(*errTbl)); + return err <= ERR_INTERNAL ? errTbl[err] : "unknown err"; +} + +#ifdef XBYAK_NO_EXCEPTION +namespace local { + +inline int& GetErrorRef() { + static XBYAK_TLS int err = 0; + return err; +} + +inline void SetError(int err) { + if (local::GetErrorRef()) return; // keep the first err code + local::GetErrorRef() = err; +} + +} // local + +inline void ClearError() { + local::GetErrorRef() = 0; +} +inline int GetError() { return Xbyak::local::GetErrorRef(); } + +#define XBYAK_THROW(err) { Xbyak::local::SetError(err); return; } +#define XBYAK_THROW_RET(err, r) { Xbyak::local::SetError(err); return r; } + +#else +class Error : public std::exception { + int err_; +public: + explicit Error(int err) : err_(err) + { + if (err_ < 0 || err_ > ERR_INTERNAL) { + err_ = ERR_INTERNAL; + } + } + operator int() const { return err_; } + const char *what() const XBYAK_NOEXCEPT XBYAK_OVERRIDE + { + return ConvertErrorToString(err_); + } +}; + +// dummy functions +inline void ClearError() { } +inline int GetError() { return 0; } + +inline const char *ConvertErrorToString(const Error& err) +{ + return err.what(); +} + +#define XBYAK_THROW(err) { throw Error(err); } +#define XBYAK_THROW_RET(err, r) { throw Error(err); } + +#endif + +inline void *AlignedMalloc(size_t size, size_t alignment) +{ +#ifdef __MINGW32__ + return __mingw_aligned_malloc(size, alignment); +#elif defined(_WIN32) + return _aligned_malloc(size, alignment); +#else + void *p; + int ret = posix_memalign(&p, alignment, size); + return (ret == 0) ? p : 0; +#endif +} + +inline void AlignedFree(void *p) +{ +#ifdef __MINGW32__ + __mingw_aligned_free(p); +#elif defined(_MSC_VER) + _aligned_free(p); +#else + free(p); +#endif +} + +namespace inner { + +#ifdef _WIN32 +struct SystemInfo { + SYSTEM_INFO info; + SystemInfo() + { + GetSystemInfo(&info); + } +}; +#endif +//static const size_t ALIGN_PAGE_SIZE = 4096; +inline size_t getPageSize() +{ +#ifdef _WIN32 + static const SystemInfo si; + return si.info.dwPageSize; +#else +#ifdef __GNUC__ + static const long pageSize = sysconf(_SC_PAGESIZE); + if (pageSize > 0) { + return (size_t)pageSize; + } +#endif + return 4096; +#endif +} + +inline bool IsInDisp8(uint32_t x) { return 0xFFFFFF80 <= x || x <= 0x7F; } +inline bool IsInInt32(uint64_t x) { return ~uint64_t(0x7fffffffu) <= x || x <= 0x7FFFFFFFU; } + +inline uint32_t VerifyInInt32(uint64_t x) +{ +#if defined(XBYAK64) && !defined(__ILP32__) + if (!IsInInt32(x)) XBYAK_THROW_RET(ERR_OFFSET_IS_TOO_BIG, 0) +#endif + return static_cast(x); +} + +enum LabelMode { + LasIs, // as is + Labs, // absolute + LaddTop // (addr + top) for mov(reg, label) with AutoGrow +}; + +enum AddressMode { + M_none, + M_ModRM, + M_64bitDisp, + M_rip, + M_ripAddr +}; + +} // inner + +/* + custom allocator +*/ +struct Allocator { + explicit Allocator(const std::string& = "") {} // same interface with MmapAllocator + virtual uint8_t *alloc(size_t size) { return reinterpret_cast(AlignedMalloc(size, inner::getPageSize())); } + virtual void free(uint8_t *p) { AlignedFree(p); } + virtual ~Allocator() {} + /* override to return false if you call protect() manually */ + virtual bool useProtect() const { return true; } +}; + +#ifdef XBYAK_USE_MMAP_ALLOCATOR +#ifdef XBYAK_USE_MAP_JIT +namespace util { + +inline int getMacOsVersionPure() +{ + char buf[64]; + size_t size = sizeof(buf); + int err = sysctlbyname("kern.osrelease", buf, &size, NULL, 0); + if (err != 0) return 0; + char *endp; + int major = strtol(buf, &endp, 10); + if (*endp != '.') return 0; + return major; +} + +inline int getMacOsVersion() +{ + static const int version = getMacOsVersionPure(); + return version; +} + +} // util +#endif +class MmapAllocator : public Allocator { + struct Allocation { + size_t size; +#if defined(XBYAK_USE_MEMFD) + // fd_ is only used with XBYAK_USE_MEMFD. We keep the file open + // during the lifetime of each allocation in order to support + // checkpoint/restore by unprivileged users. + int fd; +#endif + }; + const std::string name_; // only used with XBYAK_USE_MEMFD + typedef XBYAK_STD_UNORDERED_MAP AllocationList; + AllocationList allocList_; +public: + explicit MmapAllocator(const std::string& name = "xbyak") : name_(name) {} + uint8_t *alloc(size_t size) XBYAK_OVERRIDE + { + const size_t alignedSizeM1 = inner::getPageSize() - 1; + size = (size + alignedSizeM1) & ~alignedSizeM1; +#if defined(MAP_ANONYMOUS) + int mode = MAP_PRIVATE | MAP_ANONYMOUS; +#elif defined(MAP_ANON) + int mode = MAP_PRIVATE | MAP_ANON; +#else + #error "not supported" +#endif +#if defined(XBYAK_USE_MAP_JIT) + const int mojaveVersion = 18; + if (util::getMacOsVersion() >= mojaveVersion) mode |= MAP_JIT; +#endif + int fd = -1; +#if defined(XBYAK_USE_MEMFD) + fd = memfd_create(name_.c_str(), MFD_CLOEXEC); + if (fd != -1) { + mode = MAP_SHARED; + if (ftruncate(fd, size) != 0) { + close(fd); + XBYAK_THROW_RET(ERR_CANT_ALLOC, 0) + } + } +#endif + void *p = mmap(NULL, size, PROT_READ | PROT_WRITE, mode, fd, 0); + if (p == MAP_FAILED) { + if (fd != -1) close(fd); + XBYAK_THROW_RET(ERR_CANT_ALLOC, 0) + } + assert(p); + Allocation &alloc = allocList_[(uintptr_t)p]; + alloc.size = size; +#if defined(XBYAK_USE_MEMFD) + alloc.fd = fd; +#endif + return (uint8_t*)p; + } + void free(uint8_t *p) XBYAK_OVERRIDE + { + if (p == 0) return; + AllocationList::iterator i = allocList_.find((uintptr_t)p); + if (i == allocList_.end()) XBYAK_THROW(ERR_BAD_PARAMETER) + if (munmap((void*)i->first, i->second.size) < 0) XBYAK_THROW(ERR_MUNMAP) +#if defined(XBYAK_USE_MEMFD) + if (i->second.fd != -1) close(i->second.fd); +#endif + allocList_.erase(i); + } +}; +#else +typedef Allocator MmapAllocator; +#endif + +class Address; +class Reg; + +struct ApxFlagNF {}; +struct ApxFlagZU {}; + +// dfv (default flags value) is or operation of these flags +static const int T_of = 8; +static const int T_sf = 4; +static const int T_zf = 2; +static const int T_cf = 1; + +class Operand { + static const uint8_t EXT8BIT = 0x20; + unsigned int idx_:6; // 0..31 + EXT8BIT = 1 if spl/bpl/sil/dil + unsigned int kind_:10; + unsigned int bit_:14; +protected: + unsigned int zero_:1; + unsigned int mask_:3; + unsigned int rounding_:3; + unsigned int NF_:1; + unsigned int ZU_:1; // ND=ZU + void setIdx(int idx) { idx_ = idx; } +public: + enum Kind { + NONE = 0, + MEM = 1 << 0, + REG = 1 << 1, + MMX = 1 << 2, + FPU = 1 << 3, + XMM = 1 << 4, + YMM = 1 << 5, + ZMM = 1 << 6, + OPMASK = 1 << 7, + BNDREG = 1 << 8, + TMM = 1 << 9 + }; + enum Code { +#ifdef XBYAK64 + RAX = 0, RCX, RDX, RBX, RSP, RBP, RSI, RDI, R8, R9, R10, R11, R12, R13, R14, R15, + R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, + R8D = 8, R9D, R10D, R11D, R12D, R13D, R14D, R15D, + R16D, R17D, R18D, R19D, R20D, R21D, R22D, R23D, R24D, R25D, R26D, R27D, R28D, R29D, R30D, R31D, + R8W = 8, R9W, R10W, R11W, R12W, R13W, R14W, R15W, + R16W, R17W, R18W, R19W, R20W, R21W, R22W, R23W, R24W, R25W, R26W, R27W, R28W, R29W, R30W, R31W, + R8B = 8, R9B, R10B, R11B, R12B, R13B, R14B, R15B, + R16B, R17B, R18B, R19B, R20B, R21B, R22B, R23B, R24B, R25B, R26B, R27B, R28B, R29B, R30B, R31B, + SPL = 4, BPL, SIL, DIL, +#endif + EAX = 0, ECX, EDX, EBX, ESP, EBP, ESI, EDI, + AX = 0, CX, DX, BX, SP, BP, SI, DI, + AL = 0, CL, DL, BL, AH, CH, DH, BH + }; + XBYAK_CONSTEXPR Operand() : idx_(0), kind_(0), bit_(0), zero_(0), mask_(0), rounding_(0), NF_(0), ZU_(0) { } + XBYAK_CONSTEXPR Operand(int idx, Kind kind, int bit, bool ext8bit = 0) + : idx_(static_cast(idx | (ext8bit ? EXT8BIT : 0))) + , kind_(kind) + , bit_(bit) + , zero_(0), mask_(0), rounding_(0), NF_(0), ZU_(0) + { + assert((bit_ & (bit_ - 1)) == 0); // bit must be power of two + } + XBYAK_CONSTEXPR Kind getKind() const { return static_cast(kind_); } + XBYAK_CONSTEXPR int getIdx() const { return idx_ & (EXT8BIT - 1); } + XBYAK_CONSTEXPR bool hasIdxBit(int bit) const { return idx_ & (1<= 4) goto ERR; +#else + if (idx >= 32) goto ERR; + if (4 <= idx && idx < 8) idx |= EXT8BIT; +#endif + break; + case 16: + case 32: + case 64: +#ifdef XBYAK32 + if (idx >= 16) goto ERR; +#else + if (idx >= 32) goto ERR; +#endif + break; + case 128: kind = XMM; break; + case 256: kind = YMM; break; + case 512: kind = ZMM; break; + case 8192: kind = TMM; break; + } + idx_ = idx; + kind_ = kind; + bit_ = bit; + if (bit >= 128) return; // keep mask_ and rounding_ + mask_ = 0; + rounding_ = 0; + return; + } +ERR: + XBYAK_THROW(ERR_CANT_CONVERT) +} + +class Label; + +struct Reg8; +struct Reg16; +struct Reg32; +struct Xmm; +struct Ymm; +struct Zmm; +#ifdef XBYAK64 +struct Reg64; +#endif +class Reg : public Operand { +public: + XBYAK_CONSTEXPR Reg() { } + XBYAK_CONSTEXPR Reg(int idx, Kind kind, int bit = 0, bool ext8bit = false) : Operand(idx, kind, bit, ext8bit) { } + // convert to Reg8/Reg16/Reg32/Reg64/XMM/YMM/ZMM + Reg changeBit(int bit) const { Reg r(*this); r.setBit(bit); return r; } + Reg8 cvt8() const; + Reg16 cvt16() const; + Reg32 cvt32() const; +#ifdef XBYAK64 + Reg64 cvt64() const; +#endif + Xmm cvt128() const; + Ymm cvt256() const; + Zmm cvt512() const; + Reg operator|(const ApxFlagNF&) const { Reg r(*this); r.setNF(); return r; } + Reg operator|(const ApxFlagZU&) const { Reg r(*this); r.setZU(); return r; } +}; + +inline const Reg& Operand::getReg() const +{ + assert(!isMEM()); + return static_cast(*this); +} + +struct Reg8 : public Reg { + explicit XBYAK_CONSTEXPR Reg8(int idx = 0, bool ext8bit = false) : Reg(idx, Operand::REG, 8, ext8bit) { } +}; + +struct Reg16 : public Reg { + explicit XBYAK_CONSTEXPR Reg16(int idx = 0) : Reg(idx, Operand::REG, 16) { } +}; + +struct Mmx : public Reg { + explicit XBYAK_CONSTEXPR Mmx(int idx = 0, Kind kind = Operand::MMX, int bit = 64) : Reg(idx, kind, bit) { } +}; + +struct EvexModifierRounding { + enum { + T_RN_SAE = 1, + T_RD_SAE = 2, + T_RU_SAE = 3, + T_RZ_SAE = 4, + T_SAE = 5 + }; + explicit XBYAK_CONSTEXPR EvexModifierRounding(int rounding) : rounding(rounding) {} + int rounding; +}; +struct EvexModifierZero{ XBYAK_CONSTEXPR EvexModifierZero() {}}; + +struct Xmm : public Mmx { + explicit XBYAK_CONSTEXPR Xmm(int idx = 0, Kind kind = Operand::XMM, int bit = 128) : Mmx(idx, kind, bit) { } + XBYAK_CONSTEXPR Xmm(Kind kind, int idx) : Mmx(idx, kind, kind == XMM ? 128 : kind == YMM ? 256 : 512) { } + Xmm operator|(const EvexModifierRounding& emr) const { Xmm r(*this); r.setRounding(emr.rounding); return r; } + Xmm copyAndSetIdx(int idx) const { Xmm ret(*this); ret.setIdx(idx); return ret; } + Xmm copyAndSetKind(Operand::Kind kind) const { Xmm ret(*this); ret.setKind(kind); return ret; } +}; + +struct Ymm : public Xmm { + explicit XBYAK_CONSTEXPR Ymm(int idx = 0, Kind kind = Operand::YMM, int bit = 256) : Xmm(idx, kind, bit) { } + Ymm operator|(const EvexModifierRounding& emr) const { Ymm r(*this); r.setRounding(emr.rounding); return r; } +}; + +struct Zmm : public Ymm { + explicit XBYAK_CONSTEXPR Zmm(int idx = 0) : Ymm(idx, Operand::ZMM, 512) { } + Zmm operator|(const EvexModifierRounding& emr) const { Zmm r(*this); r.setRounding(emr.rounding); return r; } +}; + +#ifdef XBYAK64 +struct Tmm : public Reg { + explicit XBYAK_CONSTEXPR Tmm(int idx = 0, Kind kind = Operand::TMM, int bit = 8192) : Reg(idx, kind, bit) { } +}; +#endif + +struct Opmask : public Reg { + explicit XBYAK_CONSTEXPR Opmask(int idx = 0) : Reg(idx, Operand::OPMASK, 64) {} +}; + +struct BoundsReg : public Reg { + explicit XBYAK_CONSTEXPR BoundsReg(int idx = 0) : Reg(idx, Operand::BNDREG, 128) {} +}; + +templateT operator|(const T& x, const Opmask& k) { T r(x); r.setOpmaskIdx(k.getIdx()); return r; } +templateT operator|(const T& x, const EvexModifierZero&) { T r(x); r.setZero(); return r; } +templateT operator|(const T& x, const EvexModifierRounding& emr) { T r(x); r.setRounding(emr.rounding); return r; } + +struct Fpu : public Reg { + explicit XBYAK_CONSTEXPR Fpu(int idx = 0) : Reg(idx, Operand::FPU, 32) { } +}; + +struct Reg32e : public Reg { + explicit XBYAK_CONSTEXPR Reg32e(int idx, int bit) : Reg(idx, Operand::REG, bit) {} + Reg32e operator|(const ApxFlagNF&) const { Reg32e r(*this); r.setNF(); return r; } + Reg32e operator|(const ApxFlagZU&) const { Reg32e r(*this); r.setZU(); return r; } +}; +struct Reg32 : public Reg32e { + explicit XBYAK_CONSTEXPR Reg32(int idx = 0) : Reg32e(idx, 32) {} +}; +#ifdef XBYAK64 +struct Reg64 : public Reg32e { + explicit XBYAK_CONSTEXPR Reg64(int idx = 0) : Reg32e(idx, 64) {} +}; +struct RegRip { +}; +#endif + +inline Reg8 Reg::cvt8() const +{ + Reg r = changeBit(8); return Reg8(r.getIdx(), r.isExt8bit()); +} + +inline Reg16 Reg::cvt16() const +{ + return Reg16(changeBit(16).getIdx()); +} + +inline Reg32 Reg::cvt32() const +{ + return Reg32(changeBit(32).getIdx()); +} + +#ifdef XBYAK64 +inline Reg64 Reg::cvt64() const +{ + return Reg64(changeBit(64).getIdx()); +} +#endif + +inline Xmm Reg::cvt128() const +{ + return Xmm(changeBit(128).getIdx()); +} + +inline Ymm Reg::cvt256() const +{ + return Ymm(changeBit(256).getIdx()); +} + +inline Zmm Reg::cvt512() const +{ + return Zmm(changeBit(512).getIdx()); +} + +#ifndef XBYAK_DISABLE_SEGMENT +// not derived from Reg +class Segment { + int idx_; +public: + enum { + es, cs, ss, ds, fs, gs + }; + explicit XBYAK_CONSTEXPR Segment(int idx) : idx_(idx) { assert(0 <= idx_ && idx_ < 6); } + int getIdx() const { return idx_; } + const char *toString() const + { + static const char tbl[][3] = { + "es", "cs", "ss", "ds", "fs", "gs" + }; + return tbl[idx_]; + } +}; +#endif + +/* + pattern + [base]? [+index[*scale]]? [+/-disp]* [+label]? + rip [+/-disp]* [+label]? + rip+disp if backward reference then use label.getAddress() + rip+label if forward reference + [&var]?[+/-disp]* +*/ +class RegExp { + friend class Address; +public: +#ifdef XBYAK64 + enum { i32e = 32 | 64 }; +#else + enum { i32e = 32 }; +#endif + XBYAK_CONSTEXPR RegExp() : scale_(0), disp_(0), label_(0), rip_(false), setLabel_(false) { } + XBYAK_CONSTEXPR RegExp(size_t disp) : scale_(0), disp_(disp), label_(0), rip_(false), setLabel_(false) { } + XBYAK_CONSTEXPR RegExp(const Reg& r, int scale = 1) + : scale_(scale) + , disp_(0) + , label_(0) + , rip_(false) + , setLabel_(false) + { + if (!r.isREG(i32e) && !r.is(Reg::XMM|Reg::YMM|Reg::ZMM|Reg::TMM)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) + if (scale == 0) return; + if (scale != 1 && scale != 2 && scale != 4 && scale != 8) XBYAK_THROW(ERR_BAD_SCALE) + if (r.getBit() >= 128 || scale != 1) { // xmm/ymm is always index + index_ = r; + } else { + base_ = r; + } + } + RegExp(Label& label); + + RegExp(const void *addr) + : scale_(1) + , disp_(size_t(addr)) + , label_(0) + , rip_(false) + , setLabel_(true) + { + } +#ifdef XBYAK64 + RegExp(const RegRip& /*rip*/) + : scale_(0) + , disp_(0) + , label_(0) + , rip_(true) + , setLabel_(false) + { + } +#endif + bool isVsib(int bit = 128 | 256 | 512) const { return index_.isBit(bit); } + RegExp optimize() const + { + RegExp exp = *this; + // [reg * 2] => [reg + reg] + if (index_.isBit(i32e) && !base_.getBit() && scale_ == 2) { + exp.base_ = index_; + exp.scale_ = 1; + } + return exp; + } + bool operator==(const RegExp& rhs) const + { + return base_ == rhs.base_ && index_ == rhs.index_ && disp_ == rhs.disp_ && scale_ == rhs.scale_; + } + const Reg& getBase() const { return base_; } + const Reg& getIndex() const { return index_; } + const Label *getLabel() const { return label_; } + bool isOnlyDisp() const { return !base_.getBit() && !index_.getBit(); } // for mov eax + int getScale() const { return scale_; } + size_t getDisp() const { return disp_; } + XBYAK_CONSTEXPR void verify() const + { + if (base_.getBit() >= 128) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) + if (index_.getBit() && index_.getBit() <= 64) { + if (index_.getIdx() == Operand::ESP) XBYAK_THROW(ERR_ESP_CANT_BE_INDEX) + if (base_.getBit() && base_.getBit() != index_.getBit()) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) + } + } + friend RegExp operator+(const RegExp& a, const RegExp& b); + friend RegExp operator-(const RegExp& e, size_t disp); +private: + /* + [base_ + index_ * scale_ + disp_] + base : Reg32e, index : Reg32e(w/o esp), Xmm, Ymm + */ + Reg base_; + Reg index_; + int scale_; + size_t disp_; // absolute address + Label *label_; + bool rip_; + bool setLabel_; // disp_ contains the address of label +}; + +inline RegExp operator+(const RegExp& a, const RegExp& b) +{ + if (a.index_.getBit() && b.index_.getBit()) XBYAK_THROW_RET(ERR_BAD_ADDRESSING, RegExp()) + if (a.label_ && b.label_) XBYAK_THROW_RET(ERR_BAD_ADDRESSING, RegExp()) + if (b.rip_) XBYAK_THROW_RET(ERR_BAD_ADDRESSING, RegExp()) + if (a.rip_ && !b.isOnlyDisp()) XBYAK_THROW_RET(ERR_BAD_ADDRESSING, RegExp()) + if (a.setLabel_ && b.setLabel_) XBYAK_THROW_RET(ERR_BAD_ADDRESSING, RegExp()) + RegExp ret = a; + if (ret.label_ == 0) ret.label_ = b.label_; + if (ret.setLabel_ == 0) ret.setLabel_ = b.setLabel_; + if (!ret.index_.getBit()) { ret.index_ = b.index_; ret.scale_ = b.scale_; } + if (b.base_.getBit()) { + if (ret.base_.getBit()) { + if (ret.index_.getBit()) XBYAK_THROW_RET(ERR_BAD_ADDRESSING, RegExp()) + // base + base => base + index * 1 + ret.index_ = b.base_; + // [reg + esp] => [esp + reg] + if (ret.index_.getIdx() == Operand::ESP) std::swap(ret.base_, ret.index_); + ret.scale_ = 1; + } else { + ret.base_ = b.base_; + } + } + ret.disp_ += b.disp_; + return ret; +} +inline RegExp operator*(const Reg& r, int scale) +{ + return RegExp(r, scale); +} +inline RegExp operator*(int scale, const Reg& r) +{ + return r * scale; +} +// backward compatibility for eax+0 +inline RegExp operator+(const RegExp& a, size_t b) { return a + RegExp(b); } + +inline RegExp operator-(const RegExp& e, size_t disp) +{ + RegExp ret = e; + ret.disp_ -= disp; + return ret; +} + +// 2nd parameter for constructor of CodeArray(maxSize, userPtr, alloc) +void *const AutoGrow = (void*)1; //-V566 +void *const DontSetProtectRWE = (void*)2; //-V566 + +class CodeArray { + enum Type { + USER_BUF = 1, // use userPtr(non alignment, non protect) + ALLOC_BUF, // use new(alignment, protect) + AUTO_GROW // automatically move and grow memory if necessary + }; + CodeArray(const CodeArray& rhs); + void operator=(const CodeArray&); + bool isAllocType() const { return type_ == ALLOC_BUF || type_ == AUTO_GROW; } + struct AddrInfo { + size_t codeOffset; // position to write + size_t jmpAddr; // value to write + int jmpSize; // size of jmpAddr + inner::LabelMode mode; + AddrInfo(size_t _codeOffset, size_t _jmpAddr, int _jmpSize, inner::LabelMode _mode) + : codeOffset(_codeOffset), jmpAddr(_jmpAddr), jmpSize(_jmpSize), mode(_mode) {} + uint64_t getVal(const uint8_t *top) const + { + uint64_t disp = (mode == inner::LaddTop) ? jmpAddr + size_t(top) : (mode == inner::LasIs) ? jmpAddr : jmpAddr - size_t(top); + if (jmpSize == 4) disp = inner::VerifyInInt32(disp); + return disp; + } + }; + typedef std::list AddrInfoList; + AddrInfoList addrInfoList_; + const Type type_; +#ifdef XBYAK_USE_MMAP_ALLOCATOR + MmapAllocator defaultAllocator_; +#else + Allocator defaultAllocator_; +#endif + Allocator *alloc_; +protected: + size_t maxSize_; + uint8_t *top_; + size_t size_; + bool isCalledCalcJmpAddress_; + + bool useProtect() const { return alloc_->useProtect(); } + /* + allocate new memory and copy old data to the new area + */ + void growMemory() + { + const size_t newSize = (std::max)(DEFAULT_MAX_CODE_SIZE, maxSize_ * 2); + uint8_t *newTop = alloc_->alloc(newSize); + if (newTop == 0) XBYAK_THROW(ERR_CANT_ALLOC) + for (size_t i = 0; i < size_; i++) newTop[i] = top_[i]; + alloc_->free(top_); + top_ = newTop; + maxSize_ = newSize; + } + /* + calc jmp address for AutoGrow mode + */ + void calcJmpAddress() + { + if (isCalledCalcJmpAddress_) return; + for (AddrInfoList::const_iterator i = addrInfoList_.begin(), ie = addrInfoList_.end(); i != ie; ++i) { + uint64_t disp = i->getVal(top_); + rewrite(i->codeOffset, disp, i->jmpSize); + } + isCalledCalcJmpAddress_ = true; + } +public: + enum ProtectMode { + PROTECT_RW = 0, // read/write + PROTECT_RWE = 1, // read/write/exec + PROTECT_RE = 2 // read/exec + }; + explicit CodeArray(size_t maxSize, void *userPtr = 0, Allocator *allocator = 0) + : type_(userPtr == AutoGrow ? AUTO_GROW : (userPtr == 0 || userPtr == DontSetProtectRWE) ? ALLOC_BUF : USER_BUF) + , alloc_(allocator ? allocator : (Allocator*)&defaultAllocator_) + , maxSize_(maxSize) + , top_(type_ == USER_BUF ? reinterpret_cast(userPtr) : alloc_->alloc((std::max)(maxSize, 1))) + , size_(0) + , isCalledCalcJmpAddress_(false) + { + if (maxSize_ > 0 && top_ == 0) XBYAK_THROW(ERR_CANT_ALLOC) + if ((type_ == ALLOC_BUF && userPtr != DontSetProtectRWE && useProtect()) && !setProtectMode(PROTECT_RWE, false)) { + alloc_->free(top_); + XBYAK_THROW(ERR_CANT_PROTECT) + } + } + virtual ~CodeArray() + { + if (isAllocType()) { + if (useProtect()) setProtectModeRW(false); + alloc_->free(top_); + } + } + bool setProtectMode(ProtectMode mode, bool throwException = true) + { + bool isOK = protect(top_, maxSize_, mode); + if (isOK) return true; + if (throwException) XBYAK_THROW_RET(ERR_CANT_PROTECT, false) + return false; + } + bool setProtectModeRE(bool throwException = true) { return setProtectMode(PROTECT_RE, throwException); } + bool setProtectModeRW(bool throwException = true) { return setProtectMode(PROTECT_RW, throwException); } + void resetSize() + { + size_ = 0; + addrInfoList_.clear(); + isCalledCalcJmpAddress_ = false; + } + void db(int code) + { + if (size_ >= maxSize_) { + if (type_ == AUTO_GROW) { + growMemory(); + } else { + XBYAK_THROW(ERR_CODE_IS_TOO_BIG) + } + } + top_[size_++] = static_cast(code); + } + void db(const uint8_t *code, size_t codeSize) + { + for (size_t i = 0; i < codeSize; i++) db(code[i]); + } + void db(uint64_t code, size_t codeSize) + { + if (codeSize > 8) XBYAK_THROW(ERR_BAD_PARAMETER) + for (size_t i = 0; i < codeSize; i++) db(static_cast(code >> (i * 8))); + } + void dw(uint32_t code) { db(code, 2); } + void dd(uint32_t code) { db(code, 4); } + void dq(uint64_t code) { db(code, 8); } + const uint8_t *getCode() const { return top_; } + template + const F getCode() const { return reinterpret_cast(top_); } + const uint8_t *getCurr() const { return &top_[size_]; } + template + const F getCurr() const { return reinterpret_cast(&top_[size_]); } + size_t getSize() const { return size_; } + void setSize(size_t size) + { + if (size > maxSize_) XBYAK_THROW(ERR_OFFSET_IS_TOO_BIG) + size_ = size; + } + void dump() const + { + const uint8_t *p = getCode(); + size_t bufSize = getSize(); + size_t remain = bufSize; + for (int i = 0; i < 4; i++) { + size_t disp = 16; + if (remain < 16) { + disp = remain; + } + for (size_t j = 0; j < 16; j++) { + if (j < disp) { + printf("%02X", p[i * 16 + j]); + } + } + putchar('\n'); + remain -= disp; + if (remain == 0) { + break; + } + } + } + /* + @param offset [in] offset from top + @param disp [in] offset from the next of jmp + @param size [in] write size(1, 2, 4, 8) + */ + void rewrite(size_t offset, uint64_t disp, size_t size) + { + assert(offset < maxSize_); + if (size != 1 && size != 2 && size != 4 && size != 8) XBYAK_THROW(ERR_BAD_PARAMETER) + uint8_t *const data = top_ + offset; + for (size_t i = 0; i < size; i++) { + data[i] = static_cast(disp >> (i * 8)); + } + } + void save(size_t offset, size_t val, int size, inner::LabelMode mode) + { + addrInfoList_.push_back(AddrInfo(offset, val, size, mode)); + } + bool isAutoGrow() const { return type_ == AUTO_GROW; } + bool isCalledCalcJmpAddress() const { return isCalledCalcJmpAddress_; } + /** + change exec permission of memory + @param addr [in] buffer address + @param size [in] buffer size + @param protectMode [in] mode(RW/RWE/RE) + @return true(success), false(failure) + */ + static inline bool protect(const void *addr, size_t size, int protectMode) + { +#if defined(_WIN32) + const DWORD c_rw = PAGE_READWRITE; + const DWORD c_rwe = PAGE_EXECUTE_READWRITE; + const DWORD c_re = PAGE_EXECUTE_READ; + DWORD mode; +#else + const int c_rw = PROT_READ | PROT_WRITE; + const int c_rwe = PROT_READ | PROT_WRITE | PROT_EXEC; + const int c_re = PROT_READ | PROT_EXEC; + int mode; +#endif + switch (protectMode) { + case PROTECT_RW: mode = c_rw; break; + case PROTECT_RWE: mode = c_rwe; break; + case PROTECT_RE: mode = c_re; break; + default: + return false; + } +#if defined(_WIN32) + DWORD oldProtect; + return VirtualProtect(const_cast(addr), size, mode, &oldProtect) != 0; +#elif defined(__GNUC__) + size_t pageSize = sysconf(_SC_PAGESIZE); + size_t iaddr = reinterpret_cast(addr); + size_t roundAddr = iaddr & ~(pageSize - static_cast(1)); + return mprotect(reinterpret_cast(roundAddr), size + (iaddr - roundAddr), mode) == 0; +#else + return true; +#endif + } + /** + get aligned memory pointer + @param addr [in] address + @param alignedSize [in] power of two + @return aligned addr by alingedSize + */ + static inline uint8_t *getAlignedAddress(uint8_t *addr, size_t alignedSize = 16) + { + return reinterpret_cast((reinterpret_cast(addr) + alignedSize - 1) & ~(alignedSize - static_cast(1))); + } +}; + +class Address : public Operand { +public: + XBYAK_CONSTEXPR Address(uint32_t sizeBit, bool broadcast, const RegExp& e) + : Operand(0, MEM, sizeBit), e_(e), label_(e.label_), mode_(), immSize(0), disp8N(0), permitVsib(false), broadcast_(broadcast), optimize_(true) + { + if (e.rip_) { + mode_ = (e.label_ || e.setLabel_) ? inner::M_ripAddr : inner::M_rip; + } else { +#ifdef XBYAK64 + uint64_t disp = e.getDisp(); + if (e.isOnlyDisp() && ((0x80000000 <= disp && disp <= 0xffffffff80000000) || e.getLabel())) { + mode_ = inner::M_64bitDisp; + } else +#endif + { + mode_ = inner::M_ModRM; + } + } + e_.verify(); + } + RegExp getRegExp() const + { + return optimize_ ? e_.optimize() : e_; + } + Address cloneNoOptimize() const { Address addr = *this; addr.optimize_ = false; return addr; } + inner::AddressMode getMode() const { return mode_; } + bool is32bit() const { return e_.getBase().getBit() == 32 || e_.getIndex().getBit() == 32; } + bool isOnlyDisp() const { return e_.isOnlyDisp(); } + size_t getDisp() const { return e_.getDisp(); } + bool is64bitDisp() const { return mode_ == inner::M_64bitDisp; } // for moffset + bool isBroadcast() const { return broadcast_; } + bool hasRex2() const { return e_.getBase().hasRex2() || e_.getIndex().hasRex2(); } + const Label* getLabel() const { return label_; } + bool operator==(const Address& rhs) const + { + return getBit() == rhs.getBit() && e_ == rhs.e_ && label_ == rhs.label_ && mode_ == rhs.mode_ && immSize == rhs.immSize && disp8N == rhs.disp8N && permitVsib == rhs.permitVsib && broadcast_ == rhs.broadcast_ && optimize_ == rhs.optimize_; + } + bool operator!=(const Address& rhs) const { return !operator==(rhs); } + bool isVsib() const { return e_.isVsib(); } +private: + RegExp e_; + const Label* label_; + inner::AddressMode mode_; +public: + int immSize; // the size of immediate value of nmemonics (0, 1, 2, 4) + int disp8N; // 0(normal), 1(force disp32), disp8N = {2, 4, 8} + bool permitVsib; +private: + bool broadcast_; + bool optimize_; +}; + +inline const Address& Operand::getAddress() const +{ + assert(isMEM()); + return static_cast(*this); +} +inline Address Operand::getAddress(int immSize) const +{ + Address addr = getAddress(); + addr.immSize = immSize; + return addr; +} + +inline bool Operand::operator==(const Operand& rhs) const +{ + if (isMEM() && rhs.isMEM()) return this->getAddress() == rhs.getAddress(); + return isEqualIfNotInherited(rhs); +} + +inline XBYAK_CONSTEXPR bool Operand::hasRex2() const +{ + return (isREG() && isExtIdx2()) || (isMEM() && static_cast(*this).hasRex2()); +} + +class AddressFrame { + void operator=(const AddressFrame&); + AddressFrame(const AddressFrame&); +public: + const uint32_t bit_; + const bool broadcast_; + explicit XBYAK_CONSTEXPR AddressFrame(uint32_t bit, bool broadcast = false) : bit_(bit), broadcast_(broadcast) { } + Address operator[](const RegExp& e) const + { + return Address(bit_, broadcast_, e); + } +}; + +struct JmpLabel { + size_t endOfJmp; /* offset from top to the end address of jmp */ + int jmpSize; + inner::LabelMode mode; + size_t disp; // disp for [rip + disp] or [forward ref label + disp] + explicit JmpLabel(size_t endOfJmp = 0, int jmpSize = 0, inner::LabelMode mode = inner::LasIs, size_t disp = 0) + : endOfJmp(endOfJmp), jmpSize(jmpSize), mode(mode), disp(disp) + { + } +}; + +class LabelManager; + +class Label { + mutable LabelManager *mgr; + mutable int id; + friend class LabelManager; +public: + Label() : mgr(0), id(0) {} + Label(const Label& rhs); + Label& operator=(const Label& rhs); + ~Label(); + void clear() { mgr = 0; id = 0; } + int getId() const { return id; } + bool isDefined() const; + const uint8_t *getAddress() const; + + // backward compatibility + static inline std::string toStr(int num) + { + char buf[16]; +#if defined(_MSC_VER) && (_MSC_VER < 1900) + _snprintf_s +#else + snprintf +#endif + (buf, sizeof(buf), ".%08x", num); + return buf; + } +}; + +inline RegExp::RegExp(Label& label) + : scale_(1) + , disp_(0) + , label_(0) + , rip_(false) + , setLabel_(true) +{ + const uint8_t *addr = label.getAddress(); + if (addr) { + disp_ = size_t(addr); + label_ = 0; + } else { + label_ = &label; + } +} + +class LabelManager { + // for string label + struct SlabelVal { + size_t offset; + SlabelVal(size_t offset) : offset(offset) {} + }; + typedef XBYAK_STD_UNORDERED_MAP SlabelDefList; + typedef XBYAK_STD_UNORDERED_MULTIMAP SlabelUndefList; + struct SlabelState { + SlabelDefList defList; + SlabelUndefList undefList; + }; + typedef std::list StateList; + // for Label class + struct ClabelVal { + ClabelVal(size_t offset = 0) : offset(offset), refCount(1) {} + size_t offset; + int refCount; + }; + typedef XBYAK_STD_UNORDERED_MAP ClabelDefList; + typedef XBYAK_STD_UNORDERED_MULTIMAP ClabelUndefList; + typedef XBYAK_STD_UNORDERED_SET LabelPtrList; + + CodeArray *base_; + // global : stateList_.front(), local : stateList_.back() + StateList stateList_; + mutable int labelId_; + ClabelDefList clabelDefList_; + ClabelUndefList clabelUndefList_; + LabelPtrList labelPtrList_; + + int getId(const Label& label) const + { + if (label.id == 0) label.id = labelId_++; + return label.id; + } + template + void define_inner(DefList& defList, UndefList& undefList, const T& labelId, size_t addrOffset) + { + // add label + typename DefList::value_type item(labelId, addrOffset); + std::pair ret = defList.insert(item); + if (!ret.second) XBYAK_THROW(ERR_LABEL_IS_REDEFINED) + // search undefined label + for (;;) { + typename UndefList::iterator itr = undefList.find(labelId); + if (itr == undefList.end()) break; + const JmpLabel *jmp = &itr->second; + const size_t offset = jmp->endOfJmp - jmp->jmpSize; + size_t disp; + if (jmp->mode == inner::LaddTop) { + disp = addrOffset; + } else if (jmp->mode == inner::Labs) { + disp = size_t(base_->getCurr()); + } else { + disp = addrOffset - jmp->endOfJmp + jmp->disp; +#ifdef XBYAK64 + if (jmp->jmpSize <= 4 && !inner::IsInInt32(disp)) XBYAK_THROW(ERR_OFFSET_IS_TOO_BIG) +#endif + if (jmp->jmpSize == 1 && !inner::IsInDisp8((uint32_t)disp)) XBYAK_THROW(ERR_LABEL_IS_TOO_FAR) + } + if (jmp->mode != inner::LasIs) { + disp += jmp->disp; + } + if (base_->isAutoGrow()) { + base_->save(offset, disp, jmp->jmpSize, jmp->mode); + } else { + base_->rewrite(offset, disp, jmp->jmpSize); + } + undefList.erase(itr); + } + } + template + bool getOffset_inner(const DefList& defList, size_t *offset, const T& label) const + { + typename DefList::const_iterator i = defList.find(label); + if (i == defList.end()) return false; + *offset = i->second.offset; + return true; + } + friend class Label; + void incRefCount(int id, Label *label) + { + clabelDefList_[id].refCount++; + labelPtrList_.insert(label); + } + void decRefCount(int id, Label *label) + { + labelPtrList_.erase(label); + ClabelDefList::iterator i = clabelDefList_.find(id); + if (i == clabelDefList_.end()) return; + if (i->second.refCount == 1) { + clabelDefList_.erase(id); + } else { + --i->second.refCount; + } + } + template + bool hasUndefinedLabel_inner(const T& list) const + { +#ifndef NDEBUG + for (typename T::const_iterator i = list.begin(); i != list.end(); ++i) { + std::cerr << "undefined label:" << i->first << std::endl; + } +#endif + return !list.empty(); + } + // detach all labels linked to LabelManager + void resetLabelPtrList() + { + for (LabelPtrList::iterator i = labelPtrList_.begin(), ie = labelPtrList_.end(); i != ie; ++i) { + (*i)->clear(); + } + labelPtrList_.clear(); + } +public: + LabelManager() + { + reset(); + } + ~LabelManager() + { + resetLabelPtrList(); + } + void reset() + { + base_ = 0; + labelId_ = 1; + stateList_.clear(); + stateList_.push_back(SlabelState()); + stateList_.push_back(SlabelState()); + clabelDefList_.clear(); + clabelUndefList_.clear(); + resetLabelPtrList(); + } + void enterLocal() + { + stateList_.push_back(SlabelState()); + } + void leaveLocal() + { + if (stateList_.size() <= 2) XBYAK_THROW(ERR_UNDER_LOCAL_LABEL) + if (hasUndefinedLabel_inner(stateList_.back().undefList)) XBYAK_THROW(ERR_LABEL_IS_NOT_FOUND) + stateList_.pop_back(); + } + void set(CodeArray *base) { base_ = base; } + void defineSlabel(std::string label) + { + if (label == "@b" || label == "@f") XBYAK_THROW(ERR_BAD_LABEL_STR) + if (label == "@@") { + SlabelDefList& defList = stateList_.front().defList; + SlabelDefList::iterator i = defList.find("@f"); + if (i != defList.end()) { + defList.erase(i); + label = "@b"; + } else { + i = defList.find("@b"); + if (i != defList.end()) { + defList.erase(i); + } + label = "@f"; + } + } + SlabelState& st = *label.c_str() == '.' ? stateList_.back() : stateList_.front(); + define_inner(st.defList, st.undefList, label, base_->getSize()); + } + void defineClabel(Label& label) + { + define_inner(clabelDefList_, clabelUndefList_, getId(label), base_->getSize()); + label.mgr = this; + labelPtrList_.insert(&label); + } + void assign(Label& dst, const Label& src) + { + ClabelDefList::const_iterator i = clabelDefList_.find(src.id); + if (i == clabelDefList_.end()) XBYAK_THROW(ERR_LABEL_ISNOT_SET_BY_L) + define_inner(clabelDefList_, clabelUndefList_, dst.id, i->second.offset); + dst.mgr = this; + labelPtrList_.insert(&dst); + } + bool getOffset(size_t *offset, std::string& label) const + { + const SlabelDefList& defList = stateList_.front().defList; + if (label == "@b") { + if (defList.find("@f") != defList.end()) { + label = "@f"; + } else if (defList.find("@b") == defList.end()) { + XBYAK_THROW_RET(ERR_LABEL_IS_NOT_FOUND, false) + } + } else if (label == "@f") { + if (defList.find("@f") != defList.end()) { + label = "@b"; + } + } + const SlabelState& st = *label.c_str() == '.' ? stateList_.back() : stateList_.front(); + return getOffset_inner(st.defList, offset, label); + } + bool getOffset(size_t *offset, const Label& label) const + { + return getOffset_inner(clabelDefList_, offset, getId(label)); + } + void addUndefinedLabel(const std::string& label, const JmpLabel& jmp) + { + SlabelState& st = *label.c_str() == '.' ? stateList_.back() : stateList_.front(); + st.undefList.insert(SlabelUndefList::value_type(label, jmp)); + } + void addUndefinedLabel(const Label& label, const JmpLabel& jmp) + { + clabelUndefList_.insert(ClabelUndefList::value_type(label.id, jmp)); + } + bool hasUndefSlabel() const + { + for (StateList::const_iterator i = stateList_.begin(), ie = stateList_.end(); i != ie; ++i) { + if (hasUndefinedLabel_inner(i->undefList)) return true; + } + return false; + } + bool hasUndefClabel() const { return hasUndefinedLabel_inner(clabelUndefList_); } + const uint8_t *getCode() const { return base_->getCode(); } + bool isReady() const { return !base_->isAutoGrow() || base_->isCalledCalcJmpAddress(); } + bool isDefined(const Label& label) const { return clabelDefList_.find(label.id) != clabelDefList_.end(); } +}; + +inline bool Label::isDefined() const +{ + return mgr && mgr->isDefined(*this); +} +inline Label::Label(const Label& rhs) +{ + id = rhs.id; + mgr = rhs.mgr; + if (mgr) mgr->incRefCount(id, this); +} +inline Label& Label::operator=(const Label& rhs) +{ + if (id) XBYAK_THROW_RET(ERR_LABEL_IS_ALREADY_SET_BY_L, *this) + id = rhs.id; + mgr = rhs.mgr; + if (mgr) mgr->incRefCount(id, this); + return *this; +} +inline Label::~Label() +{ + if (id && mgr) mgr->decRefCount(id, this); +} +inline const uint8_t* Label::getAddress() const +{ + if (mgr == 0 || !mgr->isReady()) return 0; + size_t offset; + if (!mgr->getOffset(&offset, *this)) return 0; + return mgr->getCode() + offset; +} + +typedef enum { + DefaultEncoding, + VexEncoding, + EvexEncoding, + PreAVX10v2Encoding, + AVX10v2Encoding +} PreferredEncoding; + +class CodeGenerator : public CodeArray { +public: + enum LabelType { + T_SHORT, + T_NEAR, + T_FAR, // far jump + T_AUTO // T_SHORT if possible + }; +private: + CodeGenerator operator=(const CodeGenerator&); // don't call +#ifdef XBYAK64 + enum { i32e = 32 | 64, BIT = 64 }; + static const uint64_t dummyAddr = uint64_t(0x1122334455667788ull); + typedef Reg64 NativeReg; +#else + enum { i32e = 32, BIT = 32 }; + static const size_t dummyAddr = 0x12345678; + typedef Reg32 NativeReg; +#endif + // (XMM, XMM|MEM) + static inline bool isXMM_XMMorMEM(const Operand& op1, const Operand& op2) + { + return op1.isXMM() && (op2.isXMM() || op2.isMEM()); + } + // (MMX, MMX|MEM) or (XMM, XMM|MEM) + static inline bool isXMMorMMX_MEM(const Operand& op1, const Operand& op2) + { + return (op1.isMMX() && (op2.isMMX() || op2.isMEM())) || isXMM_XMMorMEM(op1, op2); + } + // (XMM, MMX|MEM) + static inline bool isXMM_MMXorMEM(const Operand& op1, const Operand& op2) + { + return op1.isXMM() && (op2.isMMX() || op2.isMEM()); + } + // (MMX, XMM|MEM) + static inline bool isMMX_XMMorMEM(const Operand& op1, const Operand& op2) + { + return op1.isMMX() && (op2.isXMM() || op2.isMEM()); + } + // (XMM, REG32|MEM) + static inline bool isXMM_REG32orMEM(const Operand& op1, const Operand& op2) + { + return op1.isXMM() && (op2.isREG(i32e) || op2.isMEM()); + } + // (REG32, XMM|MEM) + static inline bool isREG32_XMMorMEM(const Operand& op1, const Operand& op2) + { + return op1.isREG(i32e) && (op2.isXMM() || op2.isMEM()); + } + // (REG32, REG32|MEM) + static inline bool isREG32_REG32orMEM(const Operand& op1, const Operand& op2) + { + return op1.isREG(i32e) && ((op2.isREG(i32e) && op1.getBit() == op2.getBit()) || op2.isMEM()); + } + static inline bool isValidSSE(const Operand& op) + { + // SSE instructions do not support XMM16 - XMM31 + return !(op.isXMM() && op.getIdx() >= 16); + } + static inline uint8_t rexRXB(int bit, int bit3, const Reg& r, const Reg& b, const Reg& x = Reg()) + { + int v = bit3 ? 8 : 0; + if (r.hasIdxBit(bit)) v |= 4; + if (x.hasIdxBit(bit)) v |= 2; + if (b.hasIdxBit(bit)) v |= 1; + return uint8_t(v); + } + void rex2(int bit3, int rex4bit, const Reg& r, const Reg& b, const Reg& x = Reg()) + { + db(0xD5); + db((rexRXB(4, bit3, r, b, x) << 4) | rex4bit); + } + // return true if rex2 is selected + bool rex(const Operand& op1, const Operand& op2 = Operand(), uint64_t type = 0) + { + if (op1.getNF() | op2.getNF()) XBYAK_THROW_RET(ERR_INVALID_NF, false) + if (op1.getZU() | op2.getZU()) XBYAK_THROW_RET(ERR_INVALID_ZU, false) + uint8_t rex = 0; + const Operand *p1 = &op1, *p2 = &op2; + if (p1->isMEM()) std::swap(p1, p2); + if (p1->isMEM()) XBYAK_THROW_RET(ERR_BAD_COMBINATION, false) + // except movsx(16bit, 32/64bit) + bool p66 = (op1.isBit(16) && !op2.isBit(i32e)) || (op2.isBit(16) && !op1.isBit(i32e)); + if ((type & T_66) || p66) db(0x66); + if (type & T_F2) { + db(0xF2); + } + if (type & T_F3) { + db(0xF3); + } + bool is0F = type & T_0F; + if (p2->isMEM()) { + const Reg& r = *static_cast(p1); + const Address& addr = p2->getAddress(); + const RegExp e = addr.getRegExp(); + const Reg& base = e.getBase(); + const Reg& idx = e.getIndex(); + if (BIT == 64 && addr.is32bit()) db(0x67); + rex = rexRXB(3, r.isREG(64), r, base, idx); + if (r.hasRex2() || addr.hasRex2()) { + if (type & (T_0F38|T_0F3A)) XBYAK_THROW_RET(ERR_CANT_USE_REX2, false) + rex2(is0F, rex, r, base, idx); + return true; + } + if (rex || r.isExt8bit()) rex |= 0x40; + } else { + const Reg& r1 = static_cast(op1); + const Reg& r2 = static_cast(op2); + // ModRM(reg, base); + rex = rexRXB(3, r1.isREG(64) || r2.isREG(64), r2, r1); + if (r1.hasRex2() || r2.hasRex2()) { + if (type & (T_0F38|T_0F3A)) XBYAK_THROW_RET(ERR_CANT_USE_REX2, 0) + rex2(is0F, rex, r2, r1); + return true; + } + if (rex || r1.isExt8bit() || r2.isExt8bit()) rex |= 0x40; + } + if (rex) db(rex); + return false; + } + // @@@begin of avx_type_def.h + static const uint64_t T_NONE = 0ull; + // low 3 bit + static const uint64_t T_N1 = 1ull; + static const uint64_t T_N2 = 2ull; + static const uint64_t T_N4 = 3ull; + static const uint64_t T_N8 = 4ull; + static const uint64_t T_N16 = 5ull; + static const uint64_t T_N32 = 6ull; + static const uint64_t T_NX_MASK = 7ull; + static const uint64_t T_DUP = T_NX_MASK;//1 << 4, // N = (8, 32, 64) + static const uint64_t T_N_VL = 1ull << 3; // N * (1, 2, 4) for VL + static const uint64_t T_APX = 1ull << 4; + static const uint64_t T_66 = 1ull << 5; // pp = 1 + static const uint64_t T_F3 = 1ull << 6; // pp = 2 + static const uint64_t T_ER_R = 1ull << 7; // reg{er} + static const uint64_t T_0F = 1ull << 8; + static const uint64_t T_0F38 = 1ull << 9; + static const uint64_t T_0F3A = 1ull << 10; + static const uint64_t T_MAP5 = 1ull << 11; + static const uint64_t T_L1 = 1ull << 12; + static const uint64_t T_W0 = 1ull << 13; // T_EW0 = T_W0 + static const uint64_t T_W1 = 1ull << 14; // for VEX + static const uint64_t T_EW1 = 1ull << 16; // for EVEX + static const uint64_t T_YMM = 1ull << 17; // support YMM, ZMM + static const uint64_t T_EVEX = 1ull << 18; + static const uint64_t T_ER_X = 1ull << 19; // xmm{er} + static const uint64_t T_ER_Y = 1ull << 20; // ymm{er} + static const uint64_t T_ER_Z = 1ull << 21; // zmm{er} + static const uint64_t T_SAE_X = 1ull << 22; // xmm{sae} + static const uint64_t T_SAE_Y = 1ull << 23; // ymm{sae} + static const uint64_t T_SAE_Z = 1ull << 24; // zmm{sae} + static const uint64_t T_MUST_EVEX = 1ull << 25; // contains T_EVEX + static const uint64_t T_B32 = 1ull << 26; // m32bcst + static const uint64_t T_B64 = 1ull << 27; // m64bcst + static const uint64_t T_B16 = T_B32 | T_B64; // m16bcst (Be careful) + static const uint64_t T_M_K = 1ull << 28; // mem{k} + static const uint64_t T_VSIB = 1ull << 29; + static const uint64_t T_MEM_EVEX = 1ull << 30; // use evex if mem + static const uint64_t T_MAP6 = 1ull << 31; + static const uint64_t T_NF = 1ull << 32; // T_nf + static const uint64_t T_CODE1_IF1 = 1ull << 33; // code|=1 if !r.isBit(8) + + static const uint64_t T_ND1 = 1ull << 35; // ND=1 + static const uint64_t T_ZU = 1ull << 36; // ND=ZU + static const uint64_t T_F2 = 1ull << 37; // pp = 3 + static const uint64_t T_SENTRY = (1ull << 38)-1; // attribute(>=T_SENTRY) is for error check + static const uint64_t T_ALLOW_DIFF_SIZE = 1ull << 38; // allow difference reg size + static const uint64_t T_ALLOW_ABCDH = 1ull << 39; // allow [abcd]h reg + // T_66 = 1, T_F3 = 2, T_F2 = 3 + static inline uint32_t getPP(uint64_t type) { return (type & T_66) ? 1 : (type & T_F3) ? 2 : (type & T_F2) ? 3 : 0; } + // @@@end of avx_type_def.h + static inline uint32_t getMap(uint64_t type) + { + if (type & T_MAP6) return 6; + if (type & T_MAP5) return 5; + return (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0; + } + void vex(const Reg& reg, const Reg& base, const Operand *v, uint64_t type, int code, bool x = false) + { + int w = (type & T_W1) ? 1 : 0; + bool is256 = (type & T_L1) ? true : reg.isYMM(); + bool r = reg.isExtIdx(); + bool b = base.isExtIdx(); + int idx = v ? v->getIdx() : 0; + if ((idx | reg.getIdx() | base.getIdx()) >= 16) XBYAK_THROW(ERR_BAD_COMBINATION) + uint32_t pp = getPP(type); + uint32_t vvvv = (((~idx) & 15) << 3) | (is256 ? 4 : 0) | pp; + if (!b && !x && !w && (type & T_0F)) { + db(0xC5); db((r ? 0 : 0x80) | vvvv); + } else { + uint32_t mmmm = getMap(type); + db(0xC4); db((r ? 0 : 0x80) | (x ? 0 : 0x40) | (b ? 0 : 0x20) | mmmm); db((w << 7) | vvvv); + } + db(code); + } + void verifySAE(const Reg& r, uint64_t type) const + { + if (((type & T_SAE_X) && r.isXMM()) || ((type & T_SAE_Y) && r.isYMM()) || ((type & T_SAE_Z) && r.isZMM())) return; + XBYAK_THROW(ERR_SAE_IS_INVALID) + } + void verifyER(const Reg& r, uint64_t type) const + { + if ((type & T_ER_R) && r.isREG(32|64)) return; + if (((type & T_ER_X) && r.isXMM()) || ((type & T_ER_Y) && r.isYMM()) || ((type & T_ER_Z) && r.isZMM())) return; + XBYAK_THROW(ERR_ER_IS_INVALID) + } + // (a, b, c) contains non zero two or three values then err + int verifyDuplicate(int a, int b, int c, int err) + { + int v = a | b | c; + if ((a > 0 && a != v) + (b > 0 && b != v) + (c > 0 && c != v) > 0) XBYAK_THROW_RET(err, 0) + return v; + } + int evex(const Reg& reg, const Reg& base, const Operand *v, uint64_t type, int code, const Reg *x = 0, bool b = false, int aaa = 0, uint32_t VL = 0, bool Hi16Vidx = false) + { + if (!(type & (T_EVEX | T_MUST_EVEX))) XBYAK_THROW_RET(ERR_EVEX_IS_INVALID, 0) + int w = (type & T_EW1) ? 1 : 0; + uint32_t mmm = getMap(type); + uint32_t pp = getPP(type); + int idx = v ? v->getIdx() : 0; + uint32_t vvvv = ~idx; + + bool R = reg.isExtIdx(); + bool X3 = (x && x->isExtIdx()) || (base.isSIMD() && base.isExtIdx2()); + uint8_t B4 = (base.isREG() && base.isExtIdx2()) ? 8 : 0; + uint8_t U = (x && (x->isREG() && x->isExtIdx2())) ? 0 : 4; + bool B = base.isExtIdx(); + bool Rp = reg.isExtIdx2(); + int LL; + int rounding = verifyDuplicate(reg.getRounding(), base.getRounding(), v ? v->getRounding() : 0, ERR_ROUNDING_IS_ALREADY_SET); + int disp8N = 1; + if (rounding) { + if (rounding == EvexModifierRounding::T_SAE) { + verifySAE(base, type); LL = 0; + } else { + verifyER(base, type); LL = rounding - 1; + } + b = true; + } else { + if (v) VL = (std::max)(VL, v->getBit()); + VL = (std::max)((std::max)(reg.getBit(), base.getBit()), VL); + LL = (VL >= 512 /* tmm */) ? 2 : (VL == 256) ? 1 : 0; + if (b) { + disp8N = ((type & T_B16) == T_B16) ? 2 : (type & T_B32) ? 4 : 8; + } else if ((type & T_NX_MASK) == T_DUP) { + disp8N = VL == 128 ? 8 : VL == 256 ? 32 : 64; + } else { + if ((type & (T_NX_MASK | T_N_VL)) == 0) { + type |= T_N16 | T_N_VL; // default + } + int low = type & T_NX_MASK; + if (low > 0) { + disp8N = 1 << (low - 1); + if (type & T_N_VL) disp8N *= (VL == 512 ? 4 : VL == 256 ? 2 : 1); + } + } + } + bool V4 = ((v ? v->isExtIdx2() : 0) || Hi16Vidx); + bool z = reg.hasZero() || base.hasZero() || (v ? v->hasZero() : false); + if (aaa == 0) aaa = verifyDuplicate(base.getOpmaskIdx(), reg.getOpmaskIdx(), (v ? v->getOpmaskIdx() : 0), ERR_OPMASK_IS_ALREADY_SET); + if (aaa == 0) z = 0; // clear T_z if mask is not set + db(0x62); + db((R ? 0 : 0x80) | (X3 ? 0 : 0x40) | (B ? 0 : 0x20) | (Rp ? 0 : 0x10) | B4 | mmm); + db((w == 1 ? 0x80 : 0) | ((vvvv & 15) << 3) | U | (pp & 3)); + db((z ? 0x80 : 0) | ((LL & 3) << 5) | (b ? 0x10 : 0) | (V4 ? 0 : 8) | (aaa & 7)); + db(code); + return disp8N; + } + // evex of Legacy + void evexLeg(const Reg& r, const Reg& b, const Reg& x, const Reg& v, uint64_t type, int sc = NONE) + { + int M = getMap(type); if (M == 0) M = 4; // legacy + int R3 = !r.isExtIdx(); + int X3 = !x.isExtIdx(); + int B3 = b.isExtIdx() ? 0 : 0x20; + int R4 = r.isExtIdx2() ? 0 : 0x10; + int B4 = b.isExtIdx2() ? 0x08 : 0; + int w = (type & T_W0) ? 0 : (r.isBit(64) || v.isBit(64) || (type & T_W1)); + int V = (~v.getIdx() & 15) << 3; + int X4 = x.isExtIdx2() ? 0 : 0x04; + int pp = (type & (T_F2|T_F3|T_66)) ? getPP(type) : (r.isBit(16) || v.isBit(16)); + int V4 = !v.isExtIdx2(); + int ND = (type & T_ZU) ? (r.getZU() || b.getZU()) : (type & T_ND1) ? 1 : (type & T_APX) ? 0 : v.isREG(); + int NF = r.getNF() | b.getNF() | x.getNF() | v.getNF(); + int L = 0; + if ((type & T_NF) == 0 && NF) XBYAK_THROW(ERR_INVALID_NF) + if ((type & T_ZU) == 0 && r.getZU()) XBYAK_THROW(ERR_INVALID_ZU) + db(0x62); + db((R3<<7) | (X3<<6) | B3 | R4 | B4 | M); + db((w<<7) | V | X4 | pp); + if (sc != NONE) { + db((L<<5) | (ND<<4) | sc); + } else { + db((L<<5) | (ND<<4) | (V4<<3) | (NF<<2)); + } + } + void setModRM(int mod, int r1, int r2) + { + db(static_cast((mod << 6) | ((r1 & 7) << 3) | (r2 & 7))); + } + void setSIB(const Address& addr, int reg) + { + const RegExp& e = addr.getRegExp(); + const Label *label = e.getLabel(); + int disp8N = addr.disp8N; + uint64_t disp64 = e.getDisp(); +#if defined(XBYAK64) && !defined(__ILP32__) +#ifdef XBYAK_OLD_DISP_CHECK + // treat 0xffffffff as 0xffffffffffffffff + uint64_t high = disp64 >> 32; + if (high != 0 && high != 0xFFFFFFFF) XBYAK_THROW(ERR_OFFSET_IS_TOO_BIG) +#else + // displacement should be a signed 32-bit value, so also check sign bit + uint64_t high = disp64 >> 31; + if (high != 0 && high != 0x1FFFFFFFF) XBYAK_THROW(ERR_OFFSET_IS_TOO_BIG) +#endif +#endif + uint32_t disp = static_cast(disp64); + const Reg& base = e.getBase(); + const Reg& index = e.getIndex(); + const int baseIdx = base.getIdx(); + const int baseBit = base.getBit(); + const int indexBit = index.getBit(); + enum { + mod00 = 0, mod01 = 1, mod10 = 2 + }; + int mod = mod10; // disp32 + if (!baseBit || ((baseIdx & 7) != Operand::EBP && (label == 0 && disp == 0))) { + mod = mod00; + } else if (label) { + // always disp32 + } else { + if (disp8N == 0) { + if (inner::IsInDisp8(disp)) { + mod = mod01; + } + } else { + // disp must be casted to signed + uint32_t t = static_cast(static_cast(disp) / disp8N); + if ((disp % disp8N) == 0 && inner::IsInDisp8(t)) { + disp = t; + mod = mod01; + } + } + } + const int newBaseIdx = baseBit ? (baseIdx & 7) : Operand::EBP; + /* ModR/M = [2:3:3] = [Mod:reg/code:R/M] */ + bool hasSIB = indexBit || (baseIdx & 7) == Operand::ESP; +#ifdef XBYAK64 + if (!baseBit && !indexBit) hasSIB = true; +#endif + if (hasSIB) { + setModRM(mod, reg, Operand::ESP); + /* SIB = [2:3:3] = [SS:index:base(=rm)] */ + const int idx = indexBit ? (index.getIdx() & 7) : Operand::ESP; + const int scale = e.getScale(); + const int SS = (scale == 8) ? 3 : (scale == 4) ? 2 : (scale == 2) ? 1 : 0; + setModRM(SS, idx, newBaseIdx); + } else { + setModRM(mod, reg, newBaseIdx); + } + if (mod == mod01) { + db(disp); + } else if (mod == mod10 || (mod == mod00 && !baseBit)) { + if (label) { + putL_inner(*label, false, e.getDisp() - addr.immSize, 4); + } else { + dd(disp); + } + } + } + LabelManager labelMgr_; + void writeCode(uint64_t type, const Reg& r, int code, bool rex2 = false) + { + if (!(type&T_APX || rex2)) { + if (type & T_0F) { + db(0x0F); + } else if (type & T_0F38) { + db(0x0F); db(0x38); + } else if (type & T_0F3A) { + db(0x0F); db(0x3A); + } + } + db(code | (((type & T_SENTRY) == 0 || (type & T_CODE1_IF1)) && !r.isBit(8))); + } + void opRR(const Reg& r1, const Reg& r2, uint64_t type, int code) + { + if (!(type & T_ALLOW_DIFF_SIZE) && r1.isREG() && r2.isREG() && r1.getBit() != r2.getBit()) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) + if (!(type & T_ALLOW_ABCDH) && (isBadCombination(r1, r2) || isBadCombination(r2, r1))) XBYAK_THROW(ERR_CANT_USE_ABCDH) + bool rex2 = rex(r2, r1, type); + writeCode(type, r1, code, rex2); + setModRM(3, r1.getIdx(), r2.getIdx()); + } + void opMR(const Address& addr, const Reg& r, uint64_t type, int code, uint64_t type2 = 0, int code2 = NONE) + { + if (code2 == NONE) code2 = code; + if (type2 && opROO(Reg(), addr, r, type2, code2)) return; + if (addr.is64bitDisp()) XBYAK_THROW(ERR_CANT_USE_64BIT_DISP) +#if XBYAK_STRICT_CHECK_MEM_REG_SIZE == 1 + if (!(type & T_ALLOW_DIFF_SIZE) && r.getBit() <= BIT && addr.getBit() > 0 && addr.getBit() != r.getBit()) XBYAK_THROW(ERR_BAD_MEM_SIZE) +#endif + bool rex2 = rex(addr, r, type); + writeCode(type, r, code, rex2); + opAddr(addr, r.getIdx()); + } + void opLoadSeg(const Address& addr, const Reg& reg, uint64_t type, int code) + { + if (reg.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) + if (addr.is64bitDisp()) XBYAK_THROW(ERR_CANT_USE_64BIT_DISP) + // can't use opMR + rex(addr, reg, type); + if (type & T_0F) db(0x0F); + db(code); + opAddr(addr, reg.getIdx()); + } + // for only MPX(bnd*) + void opMIB(const Address& addr, const Reg& reg, uint64_t type, int code) + { + if (addr.getMode() != inner::M_ModRM) XBYAK_THROW(ERR_INVALID_MIB_ADDRESS) + opMR(addr.cloneNoOptimize(), reg, type, code); + } + void makeJmp(uint32_t disp, LabelType type, uint8_t shortCode, uint8_t longCode, uint8_t longPref) + { + const int shortJmpSize = 2; + const int longHeaderSize = longPref ? 2 : 1; + const int longJmpSize = longHeaderSize + 4; + if (type != T_NEAR && inner::IsInDisp8(disp - shortJmpSize)) { + db(shortCode); db(disp - shortJmpSize); + } else { + if (type == T_SHORT) XBYAK_THROW(ERR_LABEL_IS_TOO_FAR) + if (longPref) db(longPref); + db(longCode); dd(disp - longJmpSize); + } + } + bool isNEAR(LabelType type) const { return type == T_NEAR || (type == T_AUTO && isDefaultJmpNEAR_); } + template + void opJmp(T& label, LabelType type, uint8_t shortCode, uint8_t longCode, uint8_t longPref) + { + if (type == T_FAR) XBYAK_THROW(ERR_NOT_SUPPORTED) + if (isAutoGrow() && size_ + 16 >= maxSize_) growMemory(); /* avoid splitting code of jmp */ + size_t offset = 0; + if (labelMgr_.getOffset(&offset, label)) { /* label exists */ + makeJmp(inner::VerifyInInt32(offset - size_), type, shortCode, longCode, longPref); + } else { + int jmpSize = 0; + if (isNEAR(type)) { + jmpSize = 4; + if (longPref) db(longPref); + db(longCode); dd(0); + } else { + jmpSize = 1; + db(shortCode); db(0); + } + JmpLabel jmp(size_, jmpSize, inner::LasIs); + labelMgr_.addUndefinedLabel(label, jmp); + } + } + void opJmpAbs(const void *addr, LabelType type, uint8_t shortCode, uint8_t longCode, uint8_t longPref = 0) + { + if (type == T_FAR) XBYAK_THROW(ERR_NOT_SUPPORTED) + if (isAutoGrow()) { + if (!isNEAR(type)) XBYAK_THROW(ERR_ONLY_T_NEAR_IS_SUPPORTED_IN_AUTO_GROW) + if (size_ + 16 >= maxSize_) growMemory(); + if (longPref) db(longPref); + db(longCode); + dd(0); + save(size_ - 4, size_t(addr) - size_, 4, inner::Labs); + } else { + makeJmp(inner::VerifyInInt32(reinterpret_cast(addr) - getCurr()), type, shortCode, longCode, longPref); + } + + } + void opJmpOp(const Operand& op, LabelType type, int ext) + { + const int bit = 16|i32e; + if (type == T_FAR) { + if (!op.isMEM(bit)) XBYAK_THROW(ERR_NOT_SUPPORTED) + opRext(op, bit, ext + 1, 0, 0xFF, false); + } else { + opRext(op, bit, ext, 0, 0xFF, true); + } + } + // reg is reg field of ModRM + // immSize is the size for immediate value + void opAddr(const Address &addr, int reg) + { + if (!addr.permitVsib && addr.isVsib()) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING) + if (addr.getMode() == inner::M_ModRM) { + setSIB(addr, reg); + } else if (addr.getMode() == inner::M_rip || addr.getMode() == inner::M_ripAddr) { + setModRM(0, reg, 5); + if (addr.getLabel()) { // [rip + Label] + putL_inner(*addr.getLabel(), true, addr.getDisp() - addr.immSize, 4); + } else { + size_t disp = addr.getDisp(); + if (addr.getMode() == inner::M_ripAddr) { + if (isAutoGrow()) XBYAK_THROW(ERR_INVALID_RIP_IN_AUTO_GROW) + disp -= (size_t)getCurr() + 4 + addr.immSize; + } + dd(inner::VerifyInInt32(disp)); + } + } + } + void opSSE(const Reg& r, const Operand& op, uint64_t type, int code, bool isValid(const Operand&, const Operand&) = 0, int imm8 = NONE) + { + if (isValid && !isValid(r, op)) XBYAK_THROW(ERR_BAD_COMBINATION) + if (!isValidSSE(r) || !isValidSSE(op)) XBYAK_THROW(ERR_NOT_SUPPORTED) + opRO(r, op, type, code, true, (imm8 != NONE) ? 1 : 0); + if (imm8 != NONE) db(imm8); + } + void opMMX_IMM(const Mmx& mmx, int imm8, int code, int ext) + { + if (!isValidSSE(mmx)) XBYAK_THROW(ERR_NOT_SUPPORTED) + uint64_t type = T_0F; + if (mmx.isXMM()) type |= T_66; + opRR(Reg32(ext), mmx, type, code); + db(imm8); + } + void opMMX(const Mmx& mmx, const Operand& op, int code, uint64_t type = T_0F, uint64_t pref = T_66, int imm8 = NONE) + { + if (mmx.isXMM()) type |= pref; + opSSE(mmx, op, type, code, isXMMorMMX_MEM, imm8); + } + void opMovXMM(const Operand& op1, const Operand& op2, uint64_t type, int code) + { + if (!isValidSSE(op1) || !isValidSSE(op2)) XBYAK_THROW(ERR_NOT_SUPPORTED) + if (op1.isXMM() && op2.isMEM()) { + opMR(op2.getAddress(), op1.getReg(), type, code); + } else if (op1.isMEM() && op2.isXMM()) { + opMR(op1.getAddress(), op2.getReg(), type, code | 1); + } else { + XBYAK_THROW(ERR_BAD_COMBINATION) + } + } + // pextr{w,b,d}, extractps + void opExt(const Operand& op, const Mmx& mmx, int code, int imm, bool hasMMX2 = false) + { + if (!isValidSSE(op) || !isValidSSE(mmx)) XBYAK_THROW(ERR_NOT_SUPPORTED) + if (hasMMX2 && op.isREG(i32e)) { /* pextrw is special */ + if (mmx.isXMM()) db(0x66); + opRR(op.getReg(), mmx, T_0F, 0xC5); db(imm); + } else { + opSSE(mmx, op, T_66 | T_0F3A, code, isXMM_REG32orMEM, imm); + } + } + // r1 is [abcd]h and r2 is reg with rex + bool isBadCombination(const Reg& r1, const Reg& r2) const + { + if (!r1.isHigh8bit()) return false; + if (r2.isExt8bit() || r2.getIdx() >= 8) return true; + return false; + } + // (r, r, m) or (r, m, r) + bool opROO(const Reg& d, const Operand& op1, const Operand& op2, uint64_t type, int code, int immSize = 0, int sc = NONE) + { + if (!(type & T_MUST_EVEX) && !d.isREG() && !(d.hasRex2NFZU() || op1.hasRex2NFZU() || op2.hasRex2NFZU())) return false; + const Operand *p1 = &op1, *p2 = &op2; + if (p1->isMEM()) { std::swap(p1, p2); } else { if (p2->isMEM()) code |= 2; } + if (p1->isMEM()) XBYAK_THROW_RET(ERR_BAD_COMBINATION, false) + if (p2->isMEM()) { + const Reg& r = *static_cast(p1); + Address addr = p2->getAddress(); + const RegExp e = addr.getRegExp(); + evexLeg(r, e.getBase(), e.getIndex(), d, type, sc); + writeCode(type, d, code); + addr.immSize = immSize; + opAddr(addr, r.getIdx()); + } else { + evexLeg(static_cast(op2), static_cast(op1), Reg(), d, type, sc); + writeCode(type, d, code); + setModRM(3, op2.getIdx(), op1.getIdx()); + } + return true; + } + void opRext(const Operand& op, int bit, int ext, uint64_t type, int code, bool disableRex = false, int immSize = 0, const Reg *d = 0) + { + int opBit = op.getBit(); + if (disableRex && opBit == 64) opBit = 32; + const Reg r(ext, Operand::REG, opBit); + if ((type & T_APX) && (d != 0 || op.hasRex2NFZU()) && opROO(d ? *d : Reg(0, Operand::REG, opBit), op, r, type, code)) return; + if (op.isMEM()) { + opMR(op.getAddress(immSize), r, type, code); + } else if (op.isREG(bit)) { + opRR(r, op.getReg().changeBit(opBit), type | T_ALLOW_ABCDH, code); + } else { + XBYAK_THROW(ERR_BAD_COMBINATION) + } + } + void opSetCC(const Operand& op, int ext) + { + if (opROO(Reg(), op, Reg(), T_APX|T_ZU|T_F2, 0x40 | ext)) return; + opRext(op, 8, 0, T_0F, 0x90 | ext); + } + void opShift(const Operand& op, int imm, int ext, const Reg *d = 0) + { + if (d == 0) verifyMemHasSize(op); + if (d && op.getBit() != 0 && d->getBit() != op.getBit()) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) + uint64_t type = T_APX|T_CODE1_IF1; if (ext & 8) type |= T_NF; if (d) type |= T_ND1; + opRext(op, 0, ext&7, type, (0xC0 | ((imm == 1 ? 1 : 0) << 4)), false, (imm != 1) ? 1 : 0, d); + if (imm != 1) db(imm); + } + void opShift(const Operand& op, const Reg8& _cl, int ext, const Reg *d = 0) + { + if (_cl.getIdx() != Operand::CL) XBYAK_THROW(ERR_BAD_COMBINATION) + if (d && op.getBit() != 0 && d->getBit() != op.getBit()) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) + uint64_t type = T_APX|T_CODE1_IF1; if (ext & 8) type |= T_NF; if (d) type |= T_ND1; + opRext(op, 0, ext&7, type, 0xD2, false, 0, d); + } + // condR assumes that op.isREG() is true + void opRO(const Reg& r, const Operand& op, uint64_t type, int code, bool condR = true, int immSize = 0) + { + if (op.isMEM()) { + opMR(op.getAddress(immSize), r, type, code); + } else if (condR) { + opRR(r, op.getReg(), type, code); + } else { + XBYAK_THROW(ERR_BAD_COMBINATION) + } + } + void opShxd(const Reg& d, const Operand& op, const Reg& reg, uint8_t imm, int code, int code2, const Reg8 *_cl = 0) + { + if (_cl && _cl->getIdx() != Operand::CL) XBYAK_THROW(ERR_BAD_COMBINATION) + if (!reg.isREG(16|i32e)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) + int immSize = _cl ? 0 : 1; + if (_cl) code |= 1; + uint64_t type = T_APX | T_NF; + if (d.isREG()) type |= T_ND1; + if (!opROO(d, op, reg, type, _cl ? code : code2, immSize)) { + opRO(reg, op, T_0F, code, true, immSize); + } + if (!_cl) db(imm); + } + // (REG, REG|MEM), (MEM, REG) + void opRO_MR(const Operand& op1, const Operand& op2, int code) + { + if (op2.isMEM()) { + if (!op1.isREG()) XBYAK_THROW(ERR_BAD_COMBINATION) + opMR(op2.getAddress(), op1.getReg(), 0, code | 2); + } else { + opRO(static_cast(op2), op1, 0, code, op1.getKind() == op2.getKind()); + } + } + bool isInDisp16(uint32_t x) const { return 0xFFFF8000 <= x || x <= 0x7FFF; } + // allow add(ax, 0x8000); + bool isInDisp16relaxed(uint32_t x) const { uint32_t v = x & 0xffff0000; return v == 0 || v == 0xffff0000; } + uint32_t getImmBit(const Operand& op, uint32_t imm) + { + verifyMemHasSize(op); + uint32_t immBit = inner::IsInDisp8(imm) ? 8 : isInDisp16relaxed(imm) ? 16 : 32; + if (op.isBit(8)) immBit = 8; + if (op.getBit() < immBit) XBYAK_THROW_RET(ERR_IMM_IS_TOO_BIG, 0) + if (op.isBit(32|64) && immBit == 16) immBit = 32; /* don't use MEM16 if 32/64bit mode */ + return immBit; + } + // (REG|MEM, IMM) + void opOI(const Operand& op, uint32_t imm, int code, int ext) + { + uint32_t immBit = getImmBit(op, imm); + if (op.isREG() && op.getIdx() == 0 && (op.getBit() == immBit || (op.isBit(64) && immBit == 32))) { // rax, eax, ax, al + rex(op); + db(code | 4 | (immBit == 8 ? 0 : 1)); + } else { + int tmp = immBit < (std::min)(op.getBit(), 32U) ? 2 : 0; + opRext(op, 0, ext, 0, 0x80 | tmp, false, immBit / 8); + } + db(imm, immBit / 8); + } + // (r, r/m, imm) + void opROI(const Reg& d, const Operand& op, uint32_t imm, uint64_t type, int ext) + { + uint32_t immBit = getImmBit(d, imm); + int code = immBit < (std::min)(d.getBit(), 32U) ? 2 : 0; + opROO(d, op, Reg(ext, Operand::REG, d.getBit()), type, 0x80 | code, immBit / 8); + db(imm, immBit / 8); + } + void opIncDec(const Reg& d, const Operand& op, int ext) + { +#ifdef XBYAK64 + if (d.isREG()) { + int code = d.isBit(8) ? 0xFE : 0xFF; + uint64_t type = T_APX|T_NF|T_ND1; + if (d.isBit(16)) type |= T_66; + opROO(d, op, Reg(ext, Operand::REG, d.getBit()), type, code); + return; + } +#else + (void)d; +#endif + verifyMemHasSize(op); +#ifndef XBYAK64 + if (op.isREG() && !op.isBit(8)) { + rex(op); db((ext ? 0x48 : 0x40) | op.getIdx()); + return; + } +#endif + opRext(op, op.getBit(), ext, 0, 0xFE); + } + void opPushPop(const Operand& op, int code, int ext, int alt) + { + if (op.isREG() && op.hasRex2()) { + const Reg& r = static_cast(op); + rex2(0, rexRXB(3, 0, Reg(), r), Reg(), r); + db(alt | (r.getIdx() & 7)); + return; + } + int bit = op.getBit(); + if (bit == 16 || bit == BIT) { + if (bit == 16) db(0x66); + if (op.isREG()) { + if (op.getReg().getIdx() >= 8) db(0x41); + db(alt | (op.getIdx() & 7)); + return; + } + if (op.isMEM()) { + opMR(op.getAddress(), Reg(ext, Operand::REG, 32), T_ALLOW_DIFF_SIZE, code); + return; + } + } + XBYAK_THROW(ERR_BAD_COMBINATION) + } + void verifyMemHasSize(const Operand& op) const + { + if (op.isMEM() && op.getBit() == 0) XBYAK_THROW(ERR_MEM_SIZE_IS_NOT_SPECIFIED) + } + /* + mov(r, imm) = db(imm, mov_imm(r, imm)) + */ + int mov_imm(const Reg& reg, uint64_t imm) + { + int bit = reg.getBit(); + const int idx = reg.getIdx(); + int code = 0xB0 | ((bit == 8 ? 0 : 1) << 3); + if (bit == 64 && (imm & ~uint64_t(0xffffffffu)) == 0) { + rex(Reg32(idx)); + bit = 32; + } else { + rex(reg); + if (bit == 64 && inner::IsInInt32(imm)) { + db(0xC7); + code = 0xC0; + bit = 32; + } + } + db(code | (idx & 7)); + return bit / 8; + } + template + void putL_inner(T& label, bool relative = false, size_t disp = 0, int jmpSize = (int)sizeof(size_t)) + { + if (relative) jmpSize = 4; + if (isAutoGrow() && size_ + 16 >= maxSize_) growMemory(); + size_t offset = 0; + if (labelMgr_.getOffset(&offset, label)) { + if (relative) { + db(inner::VerifyInInt32(offset + disp - size_ - jmpSize), jmpSize); + } else if (isAutoGrow()) { + db(uint64_t(0), jmpSize); + save(size_ - jmpSize, offset, jmpSize, inner::LaddTop); + } else { + db(size_t(top_) + offset, jmpSize); + } + return; + } + db(uint64_t(0), jmpSize); + JmpLabel jmp(size_, jmpSize, (relative ? inner::LasIs : isAutoGrow() ? inner::LaddTop : inner::Labs), disp); + labelMgr_.addUndefinedLabel(label, jmp); + } + void opMovxx(const Reg& reg, const Operand& op, uint8_t code) + { + if (op.isBit(32)) XBYAK_THROW(ERR_BAD_COMBINATION) + int w = op.isBit(16); + if (!(reg.isREG() && (reg.getBit() > op.getBit()))) XBYAK_THROW(ERR_BAD_COMBINATION) + opRO(reg, op, T_0F | T_ALLOW_DIFF_SIZE, code | w); + } + void opFpuMem(const Address& addr, uint8_t m16, uint8_t m32, uint8_t m64, uint8_t ext, uint8_t m64ext) + { + if (addr.is64bitDisp()) XBYAK_THROW(ERR_CANT_USE_64BIT_DISP) + uint8_t code = addr.isBit(16) ? m16 : addr.isBit(32) ? m32 : addr.isBit(64) ? m64 : 0; + if (!code) XBYAK_THROW(ERR_BAD_MEM_SIZE) + if (m64ext && addr.isBit(64)) ext = m64ext; + rex(addr, st0); + db(code); + opAddr(addr, ext); + } + // use code1 if reg1 == st0 + // use code2 if reg1 != st0 && reg2 == st0 + void opFpuFpu(const Fpu& reg1, const Fpu& reg2, uint32_t code1, uint32_t code2) + { + uint32_t code = reg1.getIdx() == 0 ? code1 : reg2.getIdx() == 0 ? code2 : 0; + if (!code) XBYAK_THROW(ERR_BAD_ST_COMBINATION) + db(uint8_t(code >> 8)); + db(uint8_t(code | (reg1.getIdx() | reg2.getIdx()))); + } + void opFpu(const Fpu& reg, uint8_t code1, uint8_t code2) + { + db(code1); db(code2 | reg.getIdx()); + } + void opVex(const Reg& r, const Operand *p1, const Operand& op2, uint64_t type, int code, int imm8 = NONE) + { + if (op2.isMEM()) { + Address addr = op2.getAddress(); + const RegExp& regExp = addr.getRegExp(); + const Reg& base = regExp.getBase(); + const Reg& index = regExp.getIndex(); + if (BIT == 64 && addr.is32bit()) db(0x67); + int disp8N = 0; + if ((type & (T_MUST_EVEX|T_MEM_EVEX)) || r.hasEvex() || (p1 && p1->hasEvex()) || addr.isBroadcast() || addr.getOpmaskIdx() || addr.hasRex2()) { + int aaa = addr.getOpmaskIdx(); + if (aaa && !(type & T_M_K)) XBYAK_THROW(ERR_INVALID_OPMASK_WITH_MEMORY) + bool b = false; + if (addr.isBroadcast()) { + if (!(type & (T_B32 | T_B64))) XBYAK_THROW(ERR_INVALID_BROADCAST) + b = true; + } + int VL = regExp.isVsib() ? index.getBit() : 0; + disp8N = evex(r, base, p1, type, code, &index, b, aaa, VL, index.isSIMD() && index.isExtIdx2()); + } else { + vex(r, base, p1, type, code, index.isExtIdx()); + } + if (type & T_VSIB) addr.permitVsib = true; + if (disp8N) addr.disp8N = disp8N; + if (imm8 != NONE) addr.immSize = 1; + opAddr(addr, r.getIdx()); + } else { + const Reg& base = op2.getReg(); + if ((type & T_MUST_EVEX) || r.hasEvex() || (p1 && p1->hasEvex()) || base.hasEvex()) { + evex(r, base, p1, type, code); + } else { + vex(r, base, p1, type, code); + } + setModRM(3, r.getIdx(), base.getIdx()); + } + if (imm8 != NONE) db(imm8); + } + // (r, r, r/m) + // opRRO(a, b, c) == opROO(b, c, a) + void opRRO(const Reg& d, const Reg& r1, const Operand& op2, uint64_t type, uint8_t code, int imm8 = NONE) + { + const unsigned int bit = d.getBit(); + if (r1.getBit() != bit || (op2.isREG() && op2.getBit() != bit)) XBYAK_THROW(ERR_BAD_COMBINATION) + type |= (bit == 64) ? T_W1 : T_W0; + if (d.hasRex2() || r1.hasRex2() || op2.hasRex2() || d.getNF()) { + opROO(r1, op2, d, type, code); + if (imm8 != NONE) db(imm8); + } else { + opVex(d, &r1, op2, type, code, imm8); + } + } + void opAVX_X_X_XM(const Xmm& x1, const Operand& op1, const Operand& op2, uint64_t type, int code, int imm8 = NONE) + { + const Xmm *x2 = static_cast(&op1); + const Operand *op = &op2; + if (op2.isNone()) { // (x1, op1) -> (x1, x1, op1) + x2 = &x1; + op = &op1; + } + // (x1, x2, op) + if (!((x1.isXMM() && x2->isXMM()) || ((type & T_YMM) && ((x1.isYMM() && x2->isYMM()) || (x1.isZMM() && x2->isZMM()))))) XBYAK_THROW(ERR_BAD_COMBINATION) + opVex(x1, x2, *op, type, code, imm8); + } + void opAVX_K_X_XM(const Opmask& k, const Xmm& x2, const Operand& op3, uint64_t type, int code, int imm8 = NONE) + { + if (!op3.isMEM() && (x2.getKind() != op3.getKind())) XBYAK_THROW(ERR_BAD_COMBINATION) + opVex(k, &x2, op3, type, code, imm8); + } + // (x, x/m), (y, x/m256), (z, y/m) + void checkCvt1(const Operand& x, const Operand& op) const + { + if (!op.isMEM() && !(x.is(Operand::XMM | Operand::YMM) && op.isXMM()) && !(x.isZMM() && op.isYMM())) XBYAK_THROW(ERR_BAD_COMBINATION) + } + // (x, x/m), (x, y/m256), (y, z/m) + void checkCvt2(const Xmm& x, const Operand& op) const + { + if (!(x.isXMM() && op.is(Operand::XMM | Operand::YMM | Operand::MEM)) && !(x.isYMM() && op.is(Operand::ZMM | Operand::MEM))) XBYAK_THROW(ERR_BAD_COMBINATION) + } + void opCvt(const Xmm& x, const Operand& op, uint64_t type, int code) + { + Operand::Kind kind = x.isXMM() ? (op.isBit(256) ? Operand::YMM : Operand::XMM) : Operand::ZMM; + opVex(x.copyAndSetKind(kind), &xm0, op, type, code); + } + void opCvt2(const Xmm& x, const Operand& op, uint64_t type, int code) + { + checkCvt2(x, op); + opCvt(x, op, type, code); + } + void opCvt3(const Xmm& x1, const Xmm& x2, const Operand& op, uint64_t type, uint64_t type64, uint64_t type32, uint8_t code) + { + if (!(x1.isXMM() && x2.isXMM() && (op.isREG(i32e) || op.isMEM()))) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) + Xmm x(op.getIdx()); + const Operand *p = op.isREG() ? &x : &op; + opVex(x1, &x2, *p, type | (op.isBit(64) ? type64 : type32), code); + } + // (x, x/y/xword/yword), (y, z/m) + void checkCvt4(const Xmm& x, const Operand& op) const + { + if (!(x.isXMM() && op.is(Operand::XMM | Operand::YMM | Operand::MEM) && op.isBit(128|256)) && !(x.isYMM() && op.is(Operand::ZMM | Operand::MEM))) XBYAK_THROW(ERR_BAD_COMBINATION) + } + // (x, x/y/z/xword/yword/zword) + void opCvt5(const Xmm& x, const Operand& op, uint64_t type, int code) + { + if (!(x.isXMM() && op.isBit(128|256|512))) XBYAK_THROW(ERR_BAD_COMBINATION) + Operand::Kind kind = op.isBit(128) ? Operand::XMM : op.isBit(256) ? Operand::YMM : Operand::ZMM; + opVex(x.copyAndSetKind(kind), &xm0, op, type, code); + } + // (x, x, x/m), (x, y, y/m), (y, z, z/m) + void opCvt6(const Xmm& x1, const Xmm& x2, const Operand& op, uint64_t type, int code) + { + int b1 = x1.getBit(); + int b2 = x2.getBit(); + int b3 = op.getBit(); + if ((b1 == 128 && (b2 == 128 || b2 == 256) && (b2 == b3 || op.isMEM())) || (b1 == 256 && b2 == 512 && (b3 == b2 || op.isMEM()))) { + opVex(x1, &x2, op, type, code); + return; + } + XBYAK_THROW(ERR_BAD_COMBINATION); + } + const Xmm& cvtIdx0(const Operand& x) const + { + return x.isZMM() ? zm0 : x.isYMM() ? ym0 : xm0; + } + // support (x, x/m, imm), (y, y/m, imm) + void opAVX_X_XM_IMM(const Xmm& x, const Operand& op, uint64_t type, int code, int imm8 = NONE) + { + opAVX_X_X_XM(x, cvtIdx0(x), op, type, code, imm8); + } + void opCnt(const Reg& reg, const Operand& op, uint8_t code) + { + if (reg.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) + bool is16bit = reg.isREG(16) && (op.isREG(16) || op.isMEM()); + if (!is16bit && !(reg.isREG(i32e) && (op.isREG(reg.getBit()) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) + opRO(reg, op, T_F3 | T_0F, code); + } + void opGather(const Xmm& x1, const Address& addr, const Xmm& x2, uint64_t type, uint8_t code, int mode) + { + const RegExp& regExp = addr.getRegExp(); + if (!regExp.isVsib(128 | 256)) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING) + const int y_vx_y = 0; + const int y_vy_y = 1; +// const int x_vy_x = 2; + const bool isAddrYMM = regExp.getIndex().getBit() == 256; + if (!x1.isXMM() || isAddrYMM || !x2.isXMM()) { + bool isOK = false; + if (mode == y_vx_y) { + isOK = x1.isYMM() && !isAddrYMM && x2.isYMM(); + } else if (mode == y_vy_y) { + isOK = x1.isYMM() && isAddrYMM && x2.isYMM(); + } else { // x_vy_x + isOK = !x1.isYMM() && isAddrYMM && !x2.isYMM(); + } + if (!isOK) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING) + } + int i1 = x1.getIdx(); + int i2 = regExp.getIndex().getIdx(); + int i3 = x2.getIdx(); + if (i1 == i2 || i1 == i3 || i2 == i3) XBYAK_THROW(ERR_SAME_REGS_ARE_INVALID); + opAVX_X_X_XM(isAddrYMM ? Ymm(i1) : x1, isAddrYMM ? Ymm(i3) : x2, addr, type, code); + } + enum { + xx_yy_zz = 0, + xx_yx_zy = 1, + xx_xy_yz = 2 + }; + void checkGather2(const Xmm& x1, const Reg& x2, int mode) const + { + if (x1.isXMM() && x2.isXMM()) return; + switch (mode) { + case xx_yy_zz: if ((x1.isYMM() && x2.isYMM()) || (x1.isZMM() && x2.isZMM())) return; + break; + case xx_yx_zy: if ((x1.isYMM() && x2.isXMM()) || (x1.isZMM() && x2.isYMM())) return; + break; + case xx_xy_yz: if ((x1.isXMM() && x2.isYMM()) || (x1.isYMM() && x2.isZMM())) return; + break; + } + XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING) + } + void opGather2(const Xmm& x, const Address& addr, uint64_t type, uint8_t code, int mode) + { + if (x.hasZero()) XBYAK_THROW(ERR_INVALID_ZERO) + const RegExp& regExp = addr.getRegExp(); + checkGather2(x, regExp.getIndex(), mode); + int maskIdx = x.getOpmaskIdx(); + if ((type & T_M_K) && addr.getOpmaskIdx()) maskIdx = addr.getOpmaskIdx(); + if (maskIdx == 0) XBYAK_THROW(ERR_K0_IS_INVALID); + if (!(type & T_M_K) && x.getIdx() == regExp.getIndex().getIdx()) XBYAK_THROW(ERR_SAME_REGS_ARE_INVALID); + opVex(x, 0, addr, type, code); + } + /* + xx_xy_yz ; mode = true + xx_xy_xz ; mode = false + */ + void opVmov(const Operand& op, const Xmm& x, uint64_t type, uint8_t code, bool mode) + { + if (mode) { + if (!op.isMEM() && !((op.isXMM() && x.isXMM()) || (op.isXMM() && x.isYMM()) || (op.isYMM() && x.isZMM()))) XBYAK_THROW(ERR_BAD_COMBINATION) + } else { + if (!op.isMEM() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION) + } + opVex(x, 0, op, type, code); + } + void opGatherFetch(const Address& addr, const Xmm& x, uint64_t type, uint8_t code, Operand::Kind kind) + { + if (addr.hasZero()) XBYAK_THROW(ERR_INVALID_ZERO) + if (addr.getRegExp().getIndex().getKind() != kind) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING) + opVex(x, 0, addr, type, code); + } + void opEncoding(const Xmm& x1, const Xmm& x2, const Operand& op, uint64_t type, int code, PreferredEncoding enc, int imm = NONE, uint64_t typeVex = 0, uint64_t typeEvex = 0, int sel = 0) + { + opAVX_X_X_XM(x1, x2, op, type | orEvexIf(enc, typeVex, typeEvex, sel), code, imm); + } + PreferredEncoding getEncoding(PreferredEncoding enc, int sel) const + { + if (enc == DefaultEncoding) { + enc = defaultEncoding_[sel]; + } + if ((sel == 0 && enc != VexEncoding && enc != EvexEncoding) || (sel == 1 && enc != PreAVX10v2Encoding && enc != AVX10v2Encoding)) XBYAK_THROW_RET(ERR_BAD_ENCODING_MODE, VexEncoding) +#ifdef XBYAK_DISABLE_AVX512 + if (enc == EvexEncoding || enc == AVX10v2Encoding) XBYAK_THROW_RET(ERR_EVEX_IS_INVALID, VexEncoding) +#endif + return enc; + } + uint64_t orEvexIf(PreferredEncoding enc, uint64_t typeVex, uint64_t typeEvex, int sel) { + enc = getEncoding(enc, sel); + return ((sel == 0 && enc == VexEncoding) || (sel == 1 && enc != AVX10v2Encoding)) ? typeVex : (T_MUST_EVEX | typeEvex); + } + void opInOut(const Reg& a, const Reg& d, uint8_t code) + { + if (a.getIdx() == Operand::AL && d.getIdx() == Operand::DX && d.getBit() == 16) { + switch (a.getBit()) { + case 8: db(code); return; + case 16: db(0x66); db(code + 1); return; + case 32: db(code + 1); return; + } + } + XBYAK_THROW(ERR_BAD_COMBINATION) + } + void opInOut(const Reg& a, uint8_t code, uint8_t v) + { + if (a.getIdx() == Operand::AL) { + switch (a.getBit()) { + case 8: db(code); db(v); return; + case 16: db(0x66); db(code + 1); db(v); return; + case 32: db(code + 1); db(v); return; + } + } + XBYAK_THROW(ERR_BAD_COMBINATION) + } + void opCcmp(const Operand& op1, const Operand& op2, int dfv, int code, int sc) // cmp = 0x38, test = 0x84 + { + if (dfv < 0 || 15 < dfv) XBYAK_THROW(ERR_INVALID_DFV) + opROO(Reg(15 - dfv, Operand::REG, (op1.getBit() | op2.getBit())), op1, op2, T_APX|T_CODE1_IF1, code, 0, sc); + } + void opCcmpi(const Operand& op, int imm, int dfv, int sc) + { + if (dfv < 0 || 15 < dfv) XBYAK_THROW(ERR_INVALID_DFV) + uint32_t immBit = getImmBit(op, imm); + uint32_t opBit = op.getBit(); + int tmp = immBit < (std::min)(opBit, 32U) ? 2 : 0; + opROO(Reg(15 - dfv, Operand::REG, opBit), op, Reg(15, Operand::REG, opBit), T_APX|T_CODE1_IF1, 0x80 | tmp, immBit / 8, sc); + db(imm, immBit / 8); + } + void opTesti(const Operand& op, int imm, int dfv, int sc) + { + if (dfv < 0 || 15 < dfv) XBYAK_THROW(ERR_INVALID_DFV) + uint32_t opBit = op.getBit(); + if (opBit == 0) XBYAK_THROW(ERR_MEM_SIZE_IS_NOT_SPECIFIED); + int immBit = (std::min)(opBit, 32U); + opROO(Reg(15 - dfv, Operand::REG, opBit), op, Reg(0, Operand::REG, opBit), T_APX|T_CODE1_IF1, 0xF6, immBit / 8, sc); + db(imm, immBit / 8); + } + void opCfcmov(const Reg& d, const Operand& op1, const Operand& op2, int code) + { + const int dBit = d.getBit(); + const int op2Bit = op2.getBit(); + if (dBit > 0 && op2Bit > 0 && dBit != op2Bit) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) + if (op1.isBit(8) || op2Bit == 8) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) + if (op2.isMEM()) { + if (op1.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) + uint64_t type = dBit > 0 ? (T_MUST_EVEX|T_NF) : T_MUST_EVEX; + opROO(d, op2, op1, type, code); + } else { + opROO(d, op1, static_cast(op2)|T_nf, T_MUST_EVEX|T_NF, code); + } + } +#ifdef XBYAK64 + void opAMX(const Tmm& t1, const Address& addr, uint64_t type, int code) + { + Address addr2 = addr.cloneNoOptimize(); + // require both base and index for all but opcode 0x49 (ldtilecfg/sttilecfg) + if (code != 0x49) { + const RegExp exp = addr2.getRegExp(); + if (exp.getBase().getBit() == 0 || exp.getIndex().getBit() == 0) XBYAK_THROW(ERR_NOT_SUPPORTED) + } + if (opROO(Reg(), addr2, t1, T_APX|type, code)) return; + opVex(t1, &tmm0, addr2, type, code); + } +#endif + // (reg32e/mem, k) if rev else (k, k/mem/reg32e) + // size = 8, 16, 32, 64 + void opKmov(const Opmask& k, const Operand& op, bool rev, int size) + { + int code = 0; + bool isReg = op.isREG(size < 64 ? 32 : 64); + if (rev) { + code = isReg ? 0x93 : op.isMEM() ? 0x91 : 0; + } else { + code = op.isOPMASK() || op.isMEM() ? 0x90 : isReg ? 0x92 : 0; + } + if (code == 0) XBYAK_THROW(ERR_BAD_COMBINATION) + uint64_t type = T_0F; + switch (size) { + case 8: type |= T_W0|T_66; break; + case 16: type |= T_W0; break; + case 32: type |= isReg ? T_W0|T_F2 : T_W1|T_66; break; + case 64: type |= isReg ? T_W1|T_F2 : T_W1; break; + } + const Operand *p1 = &k, *p2 = &op; + if (code == 0x93) { std::swap(p1, p2); } + if (opROO(Reg(), *p2, *p1, T_APX|type, code)) return; + opVex(static_cast(*p1), 0, *p2, type, code); + } + void opEncodeKey(const Reg32& r1, const Reg32& r2, uint8_t code1, uint8_t code2) + { + if (r1.getIdx() < 8 && r2.getIdx() < 8) { + db(0xF3); db(0x0F); db(0x38); db(code1); setModRM(3, r1.getIdx(), r2.getIdx()); + return; + } + opROO(Reg(), r2, r1, T_MUST_EVEX|T_F3, code2); + } + void opSSE_APX(const Xmm& x, const Operand& op, uint64_t type1, uint8_t code1, uint64_t type2, uint8_t code2, int imm = NONE) + { + if (x.getIdx() <= 15 && op.hasRex2() && opROO(Reg(), op, x, type2, code2, imm != NONE ? 1 : 0)) { + if (imm != NONE) db(imm); + return; + } + opSSE(x, op, type1, code1, isXMM_XMMorMEM, imm); + } + // AVX10 zero-extending for vmovd, vmovw + void opAVX10ZeroExt(const Operand& op1, const Operand& op2, const uint64_t typeTbl[4], const int codeTbl[4], PreferredEncoding enc, int bit) + { + const Operand *p1 = &op1; + const Operand *p2 = &op2; + bool rev = false; + if (p1->isMEM()) { + std::swap(p1, p2); + rev = true; + } + if (p1->isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) + if (p1->isXMM()) { + std::swap(p1, p2); + rev = !rev; + } + enc = getEncoding(enc, 1); + int sel = -1; + if (p1->isXMM() || (p1->isMEM() && enc == AVX10v2Encoding)) { + sel = 2 + int(rev); + } else if (p1->isREG(bit) || p1->isMEM()) { + sel = int(rev); + } + if (sel == -1) XBYAK_THROW(ERR_BAD_COMBINATION) + opAVX_X_X_XM(*static_cast(p2), xm0, *p1, typeTbl[sel], codeTbl[sel]); + } +public: + unsigned int getVersion() const { return VERSION; } + using CodeArray::db; + const Mmx mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7; + const Xmm xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7; + const Ymm ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7; + const Zmm zmm0, zmm1, zmm2, zmm3, zmm4, zmm5, zmm6, zmm7; + const Xmm &xm0, &xm1, &xm2, &xm3, &xm4, &xm5, &xm6, &xm7; + const Ymm &ym0, &ym1, &ym2, &ym3, &ym4, &ym5, &ym6, &ym7; + const Zmm &zm0, &zm1, &zm2, &zm3, &zm4, &zm5, &zm6, &zm7; + const Reg32 eax, ecx, edx, ebx, esp, ebp, esi, edi; + const Reg16 ax, cx, dx, bx, sp, bp, si, di; + const Reg8 al, cl, dl, bl, ah, ch, dh, bh; + const AddressFrame ptr, byte, word, dword, qword, xword, yword, zword; // xword is same as oword of NASM + const AddressFrame ptr_b, xword_b, yword_b, zword_b; // broadcast such as {1to2}, {1to4}, {1to8}, {1to16}, {b} + const Fpu st0, st1, st2, st3, st4, st5, st6, st7; + const Opmask k0, k1, k2, k3, k4, k5, k6, k7; + const BoundsReg bnd0, bnd1, bnd2, bnd3; + const EvexModifierRounding T_sae, T_rn_sae, T_rd_sae, T_ru_sae, T_rz_sae; // {sae}, {rn-sae}, {rd-sae}, {ru-sae}, {rz-sae} + const EvexModifierZero T_z; // {z} + const ApxFlagNF T_nf; + const ApxFlagZU T_zu; +#ifdef XBYAK64 + const Reg64 rax, rcx, rdx, rbx, rsp, rbp, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, r15; + const Reg64 r16, r17, r18, r19, r20, r21, r22, r23, r24, r25, r26, r27, r28, r29, r30, r31; + const Reg32 r8d, r9d, r10d, r11d, r12d, r13d, r14d, r15d; + const Reg32 r16d, r17d, r18d, r19d, r20d, r21d, r22d, r23d, r24d, r25d, r26d, r27d, r28d, r29d, r30d, r31d; + const Reg16 r8w, r9w, r10w, r11w, r12w, r13w, r14w, r15w; + const Reg16 r16w, r17w, r18w, r19w, r20w, r21w, r22w, r23w, r24w, r25w, r26w, r27w, r28w, r29w, r30w, r31w; + const Reg8 r8b, r9b, r10b, r11b, r12b, r13b, r14b, r15b; + const Reg8 r16b, r17b, r18b, r19b, r20b, r21b, r22b, r23b, r24b, r25b, r26b, r27b, r28b, r29b, r30b, r31b; + const Reg8 spl, bpl, sil, dil; + const Xmm xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15; + const Xmm xmm16, xmm17, xmm18, xmm19, xmm20, xmm21, xmm22, xmm23; + const Xmm xmm24, xmm25, xmm26, xmm27, xmm28, xmm29, xmm30, xmm31; + const Ymm ymm8, ymm9, ymm10, ymm11, ymm12, ymm13, ymm14, ymm15; + const Ymm ymm16, ymm17, ymm18, ymm19, ymm20, ymm21, ymm22, ymm23; + const Ymm ymm24, ymm25, ymm26, ymm27, ymm28, ymm29, ymm30, ymm31; + const Zmm zmm8, zmm9, zmm10, zmm11, zmm12, zmm13, zmm14, zmm15; + const Zmm zmm16, zmm17, zmm18, zmm19, zmm20, zmm21, zmm22, zmm23; + const Zmm zmm24, zmm25, zmm26, zmm27, zmm28, zmm29, zmm30, zmm31; + const Tmm tmm0, tmm1, tmm2, tmm3, tmm4, tmm5, tmm6, tmm7; + const Xmm &xm8, &xm9, &xm10, &xm11, &xm12, &xm13, &xm14, &xm15; // for my convenience + const Xmm &xm16, &xm17, &xm18, &xm19, &xm20, &xm21, &xm22, &xm23; + const Xmm &xm24, &xm25, &xm26, &xm27, &xm28, &xm29, &xm30, &xm31; + const Ymm &ym8, &ym9, &ym10, &ym11, &ym12, &ym13, &ym14, &ym15; + const Ymm &ym16, &ym17, &ym18, &ym19, &ym20, &ym21, &ym22, &ym23; + const Ymm &ym24, &ym25, &ym26, &ym27, &ym28, &ym29, &ym30, &ym31; + const Zmm &zm8, &zm9, &zm10, &zm11, &zm12, &zm13, &zm14, &zm15; + const Zmm &zm16, &zm17, &zm18, &zm19, &zm20, &zm21, &zm22, &zm23; + const Zmm &zm24, &zm25, &zm26, &zm27, &zm28, &zm29, &zm30, &zm31; + const RegRip rip; +#endif +#ifndef XBYAK_DISABLE_SEGMENT + const Segment es, cs, ss, ds, fs, gs; +#endif +private: + bool isDefaultJmpNEAR_; + PreferredEncoding defaultEncoding_[2]; // 0:vnni, 1:vmpsadbw +public: + void L(const std::string& label) { labelMgr_.defineSlabel(label); } + void L(Label& label) { labelMgr_.defineClabel(label); } + Label L() { Label label; L(label); return label; } + void inLocalLabel() { labelMgr_.enterLocal(); } + void outLocalLabel() { labelMgr_.leaveLocal(); } + /* + assign src to dst + require + dst : does not used by L() + src : used by L() + */ + void assignL(Label& dst, const Label& src) { labelMgr_.assign(dst, src); } + /* + put address of label to buffer + @note the put size is 4(32-bit), 8(64-bit) + */ + void putL(std::string label) { putL_inner(label); } + void putL(const Label& label) { putL_inner(label); } + + // set default type of `jmp` of undefined label to T_NEAR + void setDefaultJmpNEAR(bool isNear) { isDefaultJmpNEAR_ = isNear; } + void jmp(const Operand& op, LabelType type = T_AUTO) { opJmpOp(op, type, 4); } + void jmp(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0xEB, 0xE9, 0); } + void jmp(const char *label, LabelType type = T_AUTO) { jmp(std::string(label), type); } + void jmp(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0xEB, 0xE9, 0); } + void jmp(const void *addr, LabelType type = T_AUTO) { opJmpAbs(addr, type, 0xEB, 0xE9); } + + void call(const Operand& op, LabelType type = T_AUTO) { opJmpOp(op, type, 2); } + // call(string label), not const std::string& + void call(std::string label) { opJmp(label, T_NEAR, 0, 0xE8, 0); } + void call(const char *label) { call(std::string(label)); } + void call(const Label& label) { opJmp(label, T_NEAR, 0, 0xE8, 0); } + // call(function pointer) +#ifdef XBYAK_VARIADIC_TEMPLATE + template + void call(Ret(*func)(Params...)) { call(reinterpret_cast(func)); } +#endif + void call(const void *addr) { opJmpAbs(addr, T_NEAR, 0, 0xE8); } + + void test(const Operand& op, const Reg& reg) + { + opRO(reg, op, 0, 0x84, op.getKind() == reg.getKind()); + } + void test(const Operand& op, uint32_t imm) + { + verifyMemHasSize(op); + int immSize = (std::min)(op.getBit() / 8, 4U); + if (op.isREG() && op.getIdx() == 0) { // al, ax, eax + rex(op); + db(0xA8 | (op.isBit(8) ? 0 : 1)); + } else { + opRext(op, 0, 0, 0, 0xF6, false, immSize); + } + db(imm, immSize); + } + void imul(const Reg& reg, const Operand& op, int imm) + { + int s = inner::IsInDisp8(imm) ? 1 : 0; + int immSize = s ? 1 : reg.isREG(16) ? 2 : 4; + uint8_t code = uint8_t(0x69 | (s << 1)); + if (!opROO(Reg(), op, reg, T_APX|T_NF|T_ZU, code, immSize)) { + opRO(reg, op, 0, code, reg.getKind() == op.getKind(), immSize); + } + db(imm, immSize); + } + void push(const Operand& op) { opPushPop(op, 0xFF, 6, 0x50); } + void pop(const Operand& op) { opPushPop(op, 0x8F, 0, 0x58); } + void push(const AddressFrame& af, uint32_t imm) + { + if (af.bit_ == 8) { + db(0x6A); db(imm); + } else if (af.bit_ == 16) { + db(0x66); db(0x68); dw(imm); + } else { + db(0x68); dd(imm); + } + } + /* use "push(word, 4)" if you want "push word 4" */ + void push(uint32_t imm) + { + if (inner::IsInDisp8(imm)) { + push(byte, imm); + } else { + push(dword, imm); + } + } + void mov(const Operand& op1, const Operand& op2) + { + const Reg *reg = 0; + const Address *addr = 0; + uint8_t code = 0; + if (op1.isREG() && op1.getIdx() == 0 && op2.isMEM()) { // mov eax|ax|al, [disp] + reg = &op1.getReg(); + addr= &op2.getAddress(); + code = 0xA0; + } else + if (op1.isMEM() && op2.isREG() && op2.getIdx() == 0) { // mov [disp], eax|ax|al + reg = &op2.getReg(); + addr= &op1.getAddress(); + code = 0xA2; + } +#ifdef XBYAK64 + if (addr && addr->is64bitDisp()) { + if (code) { + rex(*reg); + db(op1.isREG(8) ? 0xA0 : op1.isREG() ? 0xA1 : op2.isREG(8) ? 0xA2 : 0xA3); + if (addr->getLabel()) { + putL_inner(*addr->getLabel(), false, addr->getDisp() - addr->immSize, 8); + } else { + db(addr->getDisp(), 8); + } + } else { + XBYAK_THROW(ERR_BAD_COMBINATION) + } + } else +#else + if (code && addr->isOnlyDisp()) { + rex(*reg, *addr); + db(code | (reg->isBit(8) ? 0 : 1)); + if (addr->getLabel()) { + putL_inner(*addr->getLabel(), false, addr->getDisp() - addr->immSize); + } else { + dd(static_cast(addr->getDisp())); + } + } else +#endif + { + opRO_MR(op1, op2, 0x88); + } + } + void mov(const Operand& op, uint64_t imm) + { + if (op.isREG()) { + const int size = mov_imm(op.getReg(), imm); + db(imm, size); + } else if (op.isMEM()) { + verifyMemHasSize(op); + int immSize = op.getBit() / 8; + if (immSize <= 4) { + int64_t s = int64_t(imm) >> (immSize * 8); + if (s != 0 && s != -1) XBYAK_THROW(ERR_IMM_IS_TOO_BIG) + } else { + if (!inner::IsInInt32(imm)) XBYAK_THROW(ERR_IMM_IS_TOO_BIG) + immSize = 4; + } + opMR(op.getAddress(immSize), Reg(0, Operand::REG, op.getBit()), 0, 0xC6); + db(static_cast(imm), immSize); + } else { + XBYAK_THROW(ERR_BAD_COMBINATION) + } + } + + // The template is used to avoid ambiguity when the 2nd argument is 0. + // When the 2nd argument is 0 the call goes to + // `void mov(const Operand& op, uint64_t imm)`. + template + void mov(const T1&, const T2 *) { T1::unexpected; } + void mov(const NativeReg& reg, const Label& label) + { + mov_imm(reg, dummyAddr); + putL(label); + } + void xchg(const Operand& op1, const Operand& op2) + { + const Operand *p1 = &op1, *p2 = &op2; + if (p1->isMEM() || (p2->isREG(16 | i32e) && p2->getIdx() == 0)) { + p1 = &op2; p2 = &op1; + } + if (p1->isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) + if (p2->isREG() && (p1->isREG(16 | i32e) && p1->getIdx() == 0) +#ifdef XBYAK64 + && (p2->getIdx() != 0 || !p1->isREG(32)) +#endif + ) { + rex(*p2, *p1); db(0x90 | (p2->getIdx() & 7)); + return; + } + if (p1->isREG() && p2->isREG()) std::swap(p1, p2); // adapt to NASM 2.16.03 behavior to pass tests + opRO(static_cast(*p1), *p2, 0, 0x86 | (p1->isBit(8) ? 0 : 1), (p1->isREG() && (p1->getBit() == p2->getBit()))); + } + +#ifndef XBYAK_DISABLE_SEGMENT + void push(const Segment& seg) + { + switch (seg.getIdx()) { + case Segment::es: db(0x06); break; + case Segment::cs: db(0x0E); break; + case Segment::ss: db(0x16); break; + case Segment::ds: db(0x1E); break; + case Segment::fs: db(0x0F); db(0xA0); break; + case Segment::gs: db(0x0F); db(0xA8); break; + default: + assert(0); + } + } + void pop(const Segment& seg) + { + switch (seg.getIdx()) { + case Segment::es: db(0x07); break; + case Segment::cs: XBYAK_THROW(ERR_BAD_COMBINATION) + case Segment::ss: db(0x17); break; + case Segment::ds: db(0x1F); break; + case Segment::fs: db(0x0F); db(0xA1); break; + case Segment::gs: db(0x0F); db(0xA9); break; + default: + assert(0); + } + } + void putSeg(const Segment& seg) + { + switch (seg.getIdx()) { + case Segment::es: db(0x2E); break; + case Segment::cs: db(0x36); break; + case Segment::ss: db(0x3E); break; + case Segment::ds: db(0x26); break; + case Segment::fs: db(0x64); break; + case Segment::gs: db(0x65); break; + default: + assert(0); + } + } + void mov(const Operand& op, const Segment& seg) + { + opRO(Reg8(seg.getIdx()), op, T_ALLOW_DIFF_SIZE | T_ALLOW_ABCDH, 0x8C, op.isREG(16|i32e)); + } + void mov(const Segment& seg, const Operand& op) + { + opRO(Reg8(seg.getIdx()), op.isREG(16|i32e) ? static_cast(op.getReg().cvt32()) : op, T_ALLOW_DIFF_SIZE | T_ALLOW_ABCDH, 0x8E, op.isREG(16|i32e)); + } +#endif + + enum { NONE = 256 }; + // constructor + CodeGenerator(size_t maxSize = DEFAULT_MAX_CODE_SIZE, void *userPtr = 0, Allocator *allocator = 0) + : CodeArray(maxSize, userPtr, allocator) + , mm0(0), mm1(1), mm2(2), mm3(3), mm4(4), mm5(5), mm6(6), mm7(7) + , xmm0(0), xmm1(1), xmm2(2), xmm3(3), xmm4(4), xmm5(5), xmm6(6), xmm7(7) + , ymm0(0), ymm1(1), ymm2(2), ymm3(3), ymm4(4), ymm5(5), ymm6(6), ymm7(7) + , zmm0(0), zmm1(1), zmm2(2), zmm3(3), zmm4(4), zmm5(5), zmm6(6), zmm7(7) + // for my convenience + , xm0(xmm0), xm1(xmm1), xm2(xmm2), xm3(xmm3), xm4(xmm4), xm5(xmm5), xm6(xmm6), xm7(xmm7) + , ym0(ymm0), ym1(ymm1), ym2(ymm2), ym3(ymm3), ym4(ymm4), ym5(ymm5), ym6(ymm6), ym7(ymm7) + , zm0(zmm0), zm1(zmm1), zm2(zmm2), zm3(zmm3), zm4(zmm4), zm5(zmm5), zm6(zmm6), zm7(zmm7) + + , eax(Operand::EAX), ecx(Operand::ECX), edx(Operand::EDX), ebx(Operand::EBX), esp(Operand::ESP), ebp(Operand::EBP), esi(Operand::ESI), edi(Operand::EDI) + , ax(Operand::AX), cx(Operand::CX), dx(Operand::DX), bx(Operand::BX), sp(Operand::SP), bp(Operand::BP), si(Operand::SI), di(Operand::DI) + , al(Operand::AL), cl(Operand::CL), dl(Operand::DL), bl(Operand::BL), ah(Operand::AH), ch(Operand::CH), dh(Operand::DH), bh(Operand::BH) + , ptr(0), byte(8), word(16), dword(32), qword(64), xword(128), yword(256), zword(512) + , ptr_b(0, true), xword_b(128, true), yword_b(256, true), zword_b(512, true) + , st0(0), st1(1), st2(2), st3(3), st4(4), st5(5), st6(6), st7(7) + , k0(0), k1(1), k2(2), k3(3), k4(4), k5(5), k6(6), k7(7) + , bnd0(0), bnd1(1), bnd2(2), bnd3(3) + , T_sae(EvexModifierRounding::T_SAE), T_rn_sae(EvexModifierRounding::T_RN_SAE), T_rd_sae(EvexModifierRounding::T_RD_SAE), T_ru_sae(EvexModifierRounding::T_RU_SAE), T_rz_sae(EvexModifierRounding::T_RZ_SAE) + , T_z() + , T_nf() + , T_zu() +#ifdef XBYAK64 + , rax(Operand::RAX), rcx(Operand::RCX), rdx(Operand::RDX), rbx(Operand::RBX), rsp(Operand::RSP), rbp(Operand::RBP), rsi(Operand::RSI), rdi(Operand::RDI), r8(Operand::R8), r9(Operand::R9), r10(Operand::R10), r11(Operand::R11), r12(Operand::R12), r13(Operand::R13), r14(Operand::R14), r15(Operand::R15) + , r16(Operand::R16), r17(Operand::R17), r18(Operand::R18), r19(Operand::R19), r20(Operand::R20), r21(Operand::R21), r22(Operand::R22), r23(Operand::R23), r24(Operand::R24), r25(Operand::R25), r26(Operand::R26), r27(Operand::R27), r28(Operand::R28), r29(Operand::R29), r30(Operand::R30), r31(Operand::R31) + , r8d(8), r9d(9), r10d(10), r11d(11), r12d(12), r13d(13), r14d(14), r15d(15) + , r16d(Operand::R16D), r17d(Operand::R17D), r18d(Operand::R18D), r19d(Operand::R19D), r20d(Operand::R20D), r21d(Operand::R21D), r22d(Operand::R22D), r23d(Operand::R23D), r24d(Operand::R24D), r25d(Operand::R25D), r26d(Operand::R26D), r27d(Operand::R27D), r28d(Operand::R28D), r29d(Operand::R29D), r30d(Operand::R30D), r31d(Operand::R31D) + , r8w(8), r9w(9), r10w(10), r11w(11), r12w(12), r13w(13), r14w(14), r15w(15) + , r16w(Operand::R16W), r17w(Operand::R17W), r18w(Operand::R18W), r19w(Operand::R19W), r20w(Operand::R20W), r21w(Operand::R21W), r22w(Operand::R22W), r23w(Operand::R23W), r24w(Operand::R24W), r25w(Operand::R25W), r26w(Operand::R26W), r27w(Operand::R27W), r28w(Operand::R28W), r29w(Operand::R29W), r30w(Operand::R30W), r31w(Operand::R31W) + , r8b(8), r9b(9), r10b(10), r11b(11), r12b(12), r13b(13), r14b(14), r15b(15) + , r16b(Operand::R16B), r17b(Operand::R17B), r18b(Operand::R18B), r19b(Operand::R19B), r20b(Operand::R20B), r21b(Operand::R21B), r22b(Operand::R22B), r23b(Operand::R23B), r24b(Operand::R24B), r25b(Operand::R25B), r26b(Operand::R26B), r27b(Operand::R27B), r28b(Operand::R28B), r29b(Operand::R29B), r30b(Operand::R30B), r31b(Operand::R31B) + , spl(Operand::SPL, true), bpl(Operand::BPL, true), sil(Operand::SIL, true), dil(Operand::DIL, true) + , xmm8(8), xmm9(9), xmm10(10), xmm11(11), xmm12(12), xmm13(13), xmm14(14), xmm15(15) + , xmm16(16), xmm17(17), xmm18(18), xmm19(19), xmm20(20), xmm21(21), xmm22(22), xmm23(23) + , xmm24(24), xmm25(25), xmm26(26), xmm27(27), xmm28(28), xmm29(29), xmm30(30), xmm31(31) + , ymm8(8), ymm9(9), ymm10(10), ymm11(11), ymm12(12), ymm13(13), ymm14(14), ymm15(15) + , ymm16(16), ymm17(17), ymm18(18), ymm19(19), ymm20(20), ymm21(21), ymm22(22), ymm23(23) + , ymm24(24), ymm25(25), ymm26(26), ymm27(27), ymm28(28), ymm29(29), ymm30(30), ymm31(31) + , zmm8(8), zmm9(9), zmm10(10), zmm11(11), zmm12(12), zmm13(13), zmm14(14), zmm15(15) + , zmm16(16), zmm17(17), zmm18(18), zmm19(19), zmm20(20), zmm21(21), zmm22(22), zmm23(23) + , zmm24(24), zmm25(25), zmm26(26), zmm27(27), zmm28(28), zmm29(29), zmm30(30), zmm31(31) + , tmm0(0), tmm1(1), tmm2(2), tmm3(3), tmm4(4), tmm5(5), tmm6(6), tmm7(7) + // for my convenience + , xm8(xmm8), xm9(xmm9), xm10(xmm10), xm11(xmm11), xm12(xmm12), xm13(xmm13), xm14(xmm14), xm15(xmm15) + , xm16(xmm16), xm17(xmm17), xm18(xmm18), xm19(xmm19), xm20(xmm20), xm21(xmm21), xm22(xmm22), xm23(xmm23) + , xm24(xmm24), xm25(xmm25), xm26(xmm26), xm27(xmm27), xm28(xmm28), xm29(xmm29), xm30(xmm30), xm31(xmm31) + , ym8(ymm8), ym9(ymm9), ym10(ymm10), ym11(ymm11), ym12(ymm12), ym13(ymm13), ym14(ymm14), ym15(ymm15) + , ym16(ymm16), ym17(ymm17), ym18(ymm18), ym19(ymm19), ym20(ymm20), ym21(ymm21), ym22(ymm22), ym23(ymm23) + , ym24(ymm24), ym25(ymm25), ym26(ymm26), ym27(ymm27), ym28(ymm28), ym29(ymm29), ym30(ymm30), ym31(ymm31) + , zm8(zmm8), zm9(zmm9), zm10(zmm10), zm11(zmm11), zm12(zmm12), zm13(zmm13), zm14(zmm14), zm15(zmm15) + , zm16(zmm16), zm17(zmm17), zm18(zmm18), zm19(zmm19), zm20(zmm20), zm21(zmm21), zm22(zmm22), zm23(zmm23) + , zm24(zmm24), zm25(zmm25), zm26(zmm26), zm27(zmm27), zm28(zmm28), zm29(zmm29), zm30(zmm30), zm31(zmm31) + , rip() +#endif +#ifndef XBYAK_DISABLE_SEGMENT + , es(Segment::es), cs(Segment::cs), ss(Segment::ss), ds(Segment::ds), fs(Segment::fs), gs(Segment::gs) +#endif + , isDefaultJmpNEAR_(false) + { + setDefaultEncoding(); + setDefaultEncodingAVX10(); + labelMgr_.set(this); + } + void reset() + { + ClearError(); + resetSize(); + labelMgr_.reset(); + labelMgr_.set(this); + } + bool hasUndefinedLabel() const { return labelMgr_.hasUndefSlabel() || labelMgr_.hasUndefClabel(); } + /* + MUST call ready() to complete generating code if you use AutoGrow mode. + It is not necessary for the other mode if hasUndefinedLabel() is true. + */ + void ready(ProtectMode mode = PROTECT_RWE) + { + if (hasUndefinedLabel()) XBYAK_THROW(ERR_LABEL_IS_NOT_FOUND) + if (isAutoGrow()) { + calcJmpAddress(); + if (useProtect()) setProtectMode(mode); + } + } + // set read/exec + void readyRE() { return ready(PROTECT_RE); } +#ifdef XBYAK_TEST + void dump(bool doClear = true) + { + CodeArray::dump(); + if (doClear) size_ = 0; + } +#endif + +#ifdef XBYAK_UNDEF_JNL + #undef jnl +#endif + + // set default encoding of VNNI + // EvexEncoding : AVX512_VNNI, VexEncoding : AVX-VNNI + void setDefaultEncoding(PreferredEncoding enc = EvexEncoding) + { + if (enc != VexEncoding && enc != EvexEncoding) XBYAK_THROW(ERR_BAD_ENCODING_MODE) + defaultEncoding_[0] = enc; + } + // default : PreferredEncoding : AVX-VNNI-INT8/AVX512-FP16 + void setDefaultEncodingAVX10(PreferredEncoding enc = PreAVX10v2Encoding) + { + if (enc != PreAVX10v2Encoding && enc != AVX10v2Encoding) XBYAK_THROW(ERR_BAD_ENCODING_MODE) + defaultEncoding_[1] = enc; + } + + void bswap(const Reg32e& r) + { + int idx = r.getIdx(); + uint8_t rex = (r.isREG(64) ? 8 : 0) | ((idx & 8) ? 1 : 0); + if (idx >= 16) { + db(0xD5); db((1<<7) | (idx & 16) | rex); + } else { + if (rex) db(0x40 | rex); + db(0x0F); + } + db(0xC8 + (idx & 7)); + } + void vmovd(const Operand& op1, const Operand& op2, PreferredEncoding enc = DefaultEncoding) + { + const uint64_t typeTbl[] = { + T_EVEX|T_66|T_0F|T_W0|T_N4, T_EVEX|T_66|T_0F|T_W0|T_N4, // legacy, avx, avx512 + T_MUST_EVEX|T_66|T_0F|T_N4, T_MUST_EVEX|T_F3|T_0F|T_N4, // avx10.2 + }; + const int codeTbl[] = { 0x7E, 0x6E, 0xD6, 0x7E }; + opAVX10ZeroExt(op1, op2, typeTbl, codeTbl, enc, 32); + } + void vmovw(const Operand& op1, const Operand& op2, PreferredEncoding enc = DefaultEncoding) + { + const uint64_t typeTbl[] = { + T_MUST_EVEX|T_66|T_MAP5|T_N2, T_MUST_EVEX|T_66|T_MAP5|T_N2, // avx512-fp16 + T_MUST_EVEX|T_F3|T_MAP5|T_N2, T_MUST_EVEX|T_F3|T_MAP5|T_N2, // avx10.2 + }; + const int codeTbl[] = { 0x7E, 0x6E, 0x7E, 0x6E }; + opAVX10ZeroExt(op1, op2, typeTbl, codeTbl, enc, 16|32|64); + } + /* + use single byte nop if useMultiByteNop = false + */ + void nop(size_t size = 1, bool useMultiByteNop = true) + { + if (!useMultiByteNop) { + for (size_t i = 0; i < size; i++) { + db(0x90); + } + return; + } + /* + Intel Architectures Software Developer's Manual Volume 2 + recommended multi-byte sequence of NOP instruction + AMD and Intel seem to agree on the same sequences for up to 9 bytes: + https://support.amd.com/TechDocs/55723_SOG_Fam_17h_Processors_3.00.pdf + */ + static const uint8_t nopTbl[9][9] = { + {0x90}, + {0x66, 0x90}, + {0x0F, 0x1F, 0x00}, + {0x0F, 0x1F, 0x40, 0x00}, + {0x0F, 0x1F, 0x44, 0x00, 0x00}, + {0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00}, + {0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00}, + {0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, + {0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}, + }; + const size_t n = sizeof(nopTbl) / sizeof(nopTbl[0]); + while (size > 0) { + size_t len = (std::min)(n, size); + const uint8_t *seq = nopTbl[len - 1]; + db(seq, len); + size -= len; + } + } +#ifndef XBYAK_DONT_READ_LIST +#include "xbyak_mnemonic.h" + /* + use single byte nop if useMultiByteNop = false + */ + void align(size_t x = 16, bool useMultiByteNop = true) + { + if (x == 1) return; + if (x < 1 || (x & (x - 1))) XBYAK_THROW(ERR_BAD_ALIGN) + if (isAutoGrow() && inner::getPageSize() % x != 0) XBYAK_THROW(ERR_BAD_ALIGN) + size_t remain = size_t(getCurr()) % x; + if (remain) { + nop(x - remain, useMultiByteNop); + } + } +#endif +}; + +template <> +inline void CodeGenerator::mov(const NativeReg& reg, const char *label) // can't use std::string +{ + assert(label); + mov_imm(reg, dummyAddr); + putL(label); +} + +namespace util { +static const XBYAK_CONSTEXPR Mmx mm0(0), mm1(1), mm2(2), mm3(3), mm4(4), mm5(5), mm6(6), mm7(7); +static const XBYAK_CONSTEXPR Xmm xmm0(0), xmm1(1), xmm2(2), xmm3(3), xmm4(4), xmm5(5), xmm6(6), xmm7(7); +static const XBYAK_CONSTEXPR Ymm ymm0(0), ymm1(1), ymm2(2), ymm3(3), ymm4(4), ymm5(5), ymm6(6), ymm7(7); +static const XBYAK_CONSTEXPR Zmm zmm0(0), zmm1(1), zmm2(2), zmm3(3), zmm4(4), zmm5(5), zmm6(6), zmm7(7); +static const XBYAK_CONSTEXPR Reg32 eax(Operand::EAX), ecx(Operand::ECX), edx(Operand::EDX), ebx(Operand::EBX), esp(Operand::ESP), ebp(Operand::EBP), esi(Operand::ESI), edi(Operand::EDI); +static const XBYAK_CONSTEXPR Reg16 ax(Operand::AX), cx(Operand::CX), dx(Operand::DX), bx(Operand::BX), sp(Operand::SP), bp(Operand::BP), si(Operand::SI), di(Operand::DI); +static const XBYAK_CONSTEXPR Reg8 al(Operand::AL), cl(Operand::CL), dl(Operand::DL), bl(Operand::BL), ah(Operand::AH), ch(Operand::CH), dh(Operand::DH), bh(Operand::BH); +static const XBYAK_CONSTEXPR AddressFrame ptr(0), byte(8), word(16), dword(32), qword(64), xword(128), yword(256), zword(512); +static const XBYAK_CONSTEXPR AddressFrame ptr_b(0, true), xword_b(128, true), yword_b(256, true), zword_b(512, true); +static const XBYAK_CONSTEXPR Fpu st0(0), st1(1), st2(2), st3(3), st4(4), st5(5), st6(6), st7(7); +static const XBYAK_CONSTEXPR Opmask k0(0), k1(1), k2(2), k3(3), k4(4), k5(5), k6(6), k7(7); +static const XBYAK_CONSTEXPR BoundsReg bnd0(0), bnd1(1), bnd2(2), bnd3(3); +static const XBYAK_CONSTEXPR EvexModifierRounding T_sae(EvexModifierRounding::T_SAE), T_rn_sae(EvexModifierRounding::T_RN_SAE), T_rd_sae(EvexModifierRounding::T_RD_SAE), T_ru_sae(EvexModifierRounding::T_RU_SAE), T_rz_sae(EvexModifierRounding::T_RZ_SAE); +static const XBYAK_CONSTEXPR EvexModifierZero T_z; +#ifdef XBYAK64 +static const XBYAK_CONSTEXPR Reg64 rax(Operand::RAX), rcx(Operand::RCX), rdx(Operand::RDX), rbx(Operand::RBX), rsp(Operand::RSP), rbp(Operand::RBP), rsi(Operand::RSI), rdi(Operand::RDI), r8(Operand::R8), r9(Operand::R9), r10(Operand::R10), r11(Operand::R11), r12(Operand::R12), r13(Operand::R13), r14(Operand::R14), r15(Operand::R15); +static const XBYAK_CONSTEXPR Reg64 r16(16), r17(17), r18(18), r19(19), r20(20), r21(21), r22(22), r23(23), r24(24), r25(25), r26(26), r27(27), r28(28), r29(29), r30(30), r31(31); +static const XBYAK_CONSTEXPR Reg32 r8d(8), r9d(9), r10d(10), r11d(11), r12d(12), r13d(13), r14d(14), r15d(15); +static const XBYAK_CONSTEXPR Reg32 r16d(16), r17d(17), r18d(18), r19d(19), r20d(20), r21d(21), r22d(22), r23d(23), r24d(24), r25d(25), r26d(26), r27d(27), r28d(28), r29d(29), r30d(30), r31d(31); +static const XBYAK_CONSTEXPR Reg16 r8w(8), r9w(9), r10w(10), r11w(11), r12w(12), r13w(13), r14w(14), r15w(15); +static const XBYAK_CONSTEXPR Reg16 r16w(16), r17w(17), r18w(18), r19w(19), r20w(20), r21w(21), r22w(22), r23w(23), r24w(24), r25w(25), r26w(26), r27w(27), r28w(28), r29w(29), r30w(30), r31w(31); +static const XBYAK_CONSTEXPR Reg8 r8b(8), r9b(9), r10b(10), r11b(11), r12b(12), r13b(13), r14b(14), r15b(15), spl(Operand::SPL, true), bpl(Operand::BPL, true), sil(Operand::SIL, true), dil(Operand::DIL, true); +static const XBYAK_CONSTEXPR Reg8 r16b(16), r17b(17), r18b(18), r19b(19), r20b(20), r21b(21), r22b(22), r23b(23), r24b(24), r25b(25), r26b(26), r27b(27), r28b(28), r29b(29), r30b(30), r31b(31); +static const XBYAK_CONSTEXPR Xmm xmm8(8), xmm9(9), xmm10(10), xmm11(11), xmm12(12), xmm13(13), xmm14(14), xmm15(15); +static const XBYAK_CONSTEXPR Xmm xmm16(16), xmm17(17), xmm18(18), xmm19(19), xmm20(20), xmm21(21), xmm22(22), xmm23(23); +static const XBYAK_CONSTEXPR Xmm xmm24(24), xmm25(25), xmm26(26), xmm27(27), xmm28(28), xmm29(29), xmm30(30), xmm31(31); +static const XBYAK_CONSTEXPR Ymm ymm8(8), ymm9(9), ymm10(10), ymm11(11), ymm12(12), ymm13(13), ymm14(14), ymm15(15); +static const XBYAK_CONSTEXPR Ymm ymm16(16), ymm17(17), ymm18(18), ymm19(19), ymm20(20), ymm21(21), ymm22(22), ymm23(23); +static const XBYAK_CONSTEXPR Ymm ymm24(24), ymm25(25), ymm26(26), ymm27(27), ymm28(28), ymm29(29), ymm30(30), ymm31(31); +static const XBYAK_CONSTEXPR Zmm zmm8(8), zmm9(9), zmm10(10), zmm11(11), zmm12(12), zmm13(13), zmm14(14), zmm15(15); +static const XBYAK_CONSTEXPR Zmm zmm16(16), zmm17(17), zmm18(18), zmm19(19), zmm20(20), zmm21(21), zmm22(22), zmm23(23); +static const XBYAK_CONSTEXPR Zmm zmm24(24), zmm25(25), zmm26(26), zmm27(27), zmm28(28), zmm29(29), zmm30(30), zmm31(31); +static const XBYAK_CONSTEXPR Zmm tmm0(0), tmm1(1), tmm2(2), tmm3(3), tmm4(4), tmm5(5), tmm6(6), tmm7(7); +static const XBYAK_CONSTEXPR RegRip rip; +static const XBYAK_CONSTEXPR ApxFlagNF T_nf; +static const XBYAK_CONSTEXPR ApxFlagZU T_zu; +#endif +#ifndef XBYAK_DISABLE_SEGMENT +static const XBYAK_CONSTEXPR Segment es(Segment::es), cs(Segment::cs), ss(Segment::ss), ds(Segment::ds), fs(Segment::fs), gs(Segment::gs); +#endif +} // util + +#ifdef _MSC_VER + #pragma warning(pop) +#endif + +#if defined(__GNUC__) && !defined(__clang__) + #pragma GCC diagnostic pop +#endif + +} // end of namespace + +#endif // XBYAK_XBYAK_H_ diff --git a/wx_key/vendor/xbyak/xbyak/xbyak_bin2hex.h b/wx_key/vendor/xbyak/xbyak/xbyak_bin2hex.h new file mode 100644 index 0000000..69ecdbf --- /dev/null +++ b/wx_key/vendor/xbyak/xbyak/xbyak_bin2hex.h @@ -0,0 +1,258 @@ +enum { + B00000000= 0, + B00000001= 1, + B00000010= 2, + B00000011= 3, + B00000100= 4, + B00000101= 5, + B00000110= 6, + B00000111= 7, + B00001000= 8, + B00001001= 9, + B00001010= 10, + B00001011= 11, + B00001100= 12, + B00001101= 13, + B00001110= 14, + B00001111= 15, + B00010000= 16, + B00010001= 17, + B00010010= 18, + B00010011= 19, + B00010100= 20, + B00010101= 21, + B00010110= 22, + B00010111= 23, + B00011000= 24, + B00011001= 25, + B00011010= 26, + B00011011= 27, + B00011100= 28, + B00011101= 29, + B00011110= 30, + B00011111= 31, + B00100000= 32, + B00100001= 33, + B00100010= 34, + B00100011= 35, + B00100100= 36, + B00100101= 37, + B00100110= 38, + B00100111= 39, + B00101000= 40, + B00101001= 41, + B00101010= 42, + B00101011= 43, + B00101100= 44, + B00101101= 45, + B00101110= 46, + B00101111= 47, + B00110000= 48, + B00110001= 49, + B00110010= 50, + B00110011= 51, + B00110100= 52, + B00110101= 53, + B00110110= 54, + B00110111= 55, + B00111000= 56, + B00111001= 57, + B00111010= 58, + B00111011= 59, + B00111100= 60, + B00111101= 61, + B00111110= 62, + B00111111= 63, + B01000000= 64, + B01000001= 65, + B01000010= 66, + B01000011= 67, + B01000100= 68, + B01000101= 69, + B01000110= 70, + B01000111= 71, + B01001000= 72, + B01001001= 73, + B01001010= 74, + B01001011= 75, + B01001100= 76, + B01001101= 77, + B01001110= 78, + B01001111= 79, + B01010000= 80, + B01010001= 81, + B01010010= 82, + B01010011= 83, + B01010100= 84, + B01010101= 85, + B01010110= 86, + B01010111= 87, + B01011000= 88, + B01011001= 89, + B01011010= 90, + B01011011= 91, + B01011100= 92, + B01011101= 93, + B01011110= 94, + B01011111= 95, + B01100000= 96, + B01100001= 97, + B01100010= 98, + B01100011= 99, + B01100100= 100, + B01100101= 101, + B01100110= 102, + B01100111= 103, + B01101000= 104, + B01101001= 105, + B01101010= 106, + B01101011= 107, + B01101100= 108, + B01101101= 109, + B01101110= 110, + B01101111= 111, + B01110000= 112, + B01110001= 113, + B01110010= 114, + B01110011= 115, + B01110100= 116, + B01110101= 117, + B01110110= 118, + B01110111= 119, + B01111000= 120, + B01111001= 121, + B01111010= 122, + B01111011= 123, + B01111100= 124, + B01111101= 125, + B01111110= 126, + B01111111= 127, + B10000000= 128, + B10000001= 129, + B10000010= 130, + B10000011= 131, + B10000100= 132, + B10000101= 133, + B10000110= 134, + B10000111= 135, + B10001000= 136, + B10001001= 137, + B10001010= 138, + B10001011= 139, + B10001100= 140, + B10001101= 141, + B10001110= 142, + B10001111= 143, + B10010000= 144, + B10010001= 145, + B10010010= 146, + B10010011= 147, + B10010100= 148, + B10010101= 149, + B10010110= 150, + B10010111= 151, + B10011000= 152, + B10011001= 153, + B10011010= 154, + B10011011= 155, + B10011100= 156, + B10011101= 157, + B10011110= 158, + B10011111= 159, + B10100000= 160, + B10100001= 161, + B10100010= 162, + B10100011= 163, + B10100100= 164, + B10100101= 165, + B10100110= 166, + B10100111= 167, + B10101000= 168, + B10101001= 169, + B10101010= 170, + B10101011= 171, + B10101100= 172, + B10101101= 173, + B10101110= 174, + B10101111= 175, + B10110000= 176, + B10110001= 177, + B10110010= 178, + B10110011= 179, + B10110100= 180, + B10110101= 181, + B10110110= 182, + B10110111= 183, + B10111000= 184, + B10111001= 185, + B10111010= 186, + B10111011= 187, + B10111100= 188, + B10111101= 189, + B10111110= 190, + B10111111= 191, + B11000000= 192, + B11000001= 193, + B11000010= 194, + B11000011= 195, + B11000100= 196, + B11000101= 197, + B11000110= 198, + B11000111= 199, + B11001000= 200, + B11001001= 201, + B11001010= 202, + B11001011= 203, + B11001100= 204, + B11001101= 205, + B11001110= 206, + B11001111= 207, + B11010000= 208, + B11010001= 209, + B11010010= 210, + B11010011= 211, + B11010100= 212, + B11010101= 213, + B11010110= 214, + B11010111= 215, + B11011000= 216, + B11011001= 217, + B11011010= 218, + B11011011= 219, + B11011100= 220, + B11011101= 221, + B11011110= 222, + B11011111= 223, + B11100000= 224, + B11100001= 225, + B11100010= 226, + B11100011= 227, + B11100100= 228, + B11100101= 229, + B11100110= 230, + B11100111= 231, + B11101000= 232, + B11101001= 233, + B11101010= 234, + B11101011= 235, + B11101100= 236, + B11101101= 237, + B11101110= 238, + B11101111= 239, + B11110000= 240, + B11110001= 241, + B11110010= 242, + B11110011= 243, + B11110100= 244, + B11110101= 245, + B11110110= 246, + B11110111= 247, + B11111000= 248, + B11111001= 249, + B11111010= 250, + B11111011= 251, + B11111100= 252, + B11111101= 253, + B11111110= 254, + B11111111= 255 +}; diff --git a/wx_key/vendor/xbyak/xbyak/xbyak_mnemonic.h b/wx_key/vendor/xbyak/xbyak/xbyak_mnemonic.h new file mode 100644 index 0000000..5f75098 --- /dev/null +++ b/wx_key/vendor/xbyak/xbyak/xbyak_mnemonic.h @@ -0,0 +1,2706 @@ +const char *getVersionString() const { return "7.30"; } +void aadd(const Address& addr, const Reg32e ®) { opMR(addr, reg, T_0F38, 0x0FC, T_APX); } +void aand(const Address& addr, const Reg32e ®) { opMR(addr, reg, T_0F38|T_66, 0x0FC, T_APX|T_66); } +void adc(const Operand& op, uint32_t imm) { opOI(op, imm, 0x10, 2); } +void adc(const Operand& op1, const Operand& op2) { opRO_MR(op1, op2, 0x10); } +void adc(const Reg& d, const Operand& op, uint32_t imm) { opROI(d, op, imm, T_NONE, 2); } +void adc(const Reg& d, const Operand& op1, const Operand& op2) { opROO(d, op1, op2, T_NONE, 0x10); } +void adcx(const Reg32e& d, const Reg32e& reg, const Operand& op) { opROO(d, op, reg, T_66, 0x66); } +void adcx(const Reg32e& reg, const Operand& op) { if (!reg.isREG(16|i32e) && reg.getBit() == op.getBit()) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) if (opROO(Reg(), op, reg, T_66, 0x66)) return; opRO(reg, op, T_66 | T_0F38, 0xF6); } +void add(const Operand& op, uint32_t imm) { opOI(op, imm, 0x00, 0); } +void add(const Operand& op1, const Operand& op2) { opRO_MR(op1, op2, 0x00); } +void add(const Reg& d, const Operand& op, uint32_t imm) { opROI(d, op, imm, T_NF|T_CODE1_IF1, 0); } +void add(const Reg& d, const Operand& op1, const Operand& op2) { opROO(d, op1, op2, T_NF|T_CODE1_IF1, 0x00); } +void addpd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_66, 0x58, isXMM_XMMorMEM); } +void addps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F, 0x58, isXMM_XMMorMEM); } +void addsd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_F2, 0x58, isXMM_XMMorMEM); } +void addss(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_F3, 0x58, isXMM_XMMorMEM); } +void addsubpd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66|T_0F|T_YMM, 0xD0, isXMM_XMMorMEM); } +void addsubps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_F2|T_0F|T_YMM, 0xD0, isXMM_XMMorMEM); } +void adox(const Reg32e& d, const Reg32e& reg, const Operand& op) { opROO(d, op, reg, T_F3, 0x66); } +void adox(const Reg32e& reg, const Operand& op) { if (!reg.isREG(16|i32e) && reg.getBit() == op.getBit()) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) if (opROO(Reg(), op, reg, T_F3, 0x66)) return; opRO(reg, op, T_F3 | T_0F38, 0xF6); } +void aesdec(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66|T_0F38|T_YMM|T_EVEX, 0xDE, isXMM_XMMorMEM); } +void aesdeclast(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66|T_0F38|T_YMM|T_EVEX, 0xDF, isXMM_XMMorMEM); } +void aesenc(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66|T_0F38|T_YMM|T_EVEX, 0xDC, isXMM_XMMorMEM); } +void aesenclast(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66|T_0F38|T_YMM|T_EVEX, 0xDD, isXMM_XMMorMEM); } +void aesimc(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66|T_0F38|T_W0, 0xDB, isXMM_XMMorMEM, NONE); } +void aeskeygenassist(const Xmm& xmm, const Operand& op, uint8_t imm) { opSSE(xmm, op, T_66|T_0F3A, 0xDF, isXMM_XMMorMEM, imm); } +void and_(const Operand& op, uint32_t imm) { opOI(op, imm, 0x20, 4); } +void and_(const Operand& op1, const Operand& op2) { opRO_MR(op1, op2, 0x20); } +void and_(const Reg& d, const Operand& op, uint32_t imm) { opROI(d, op, imm, T_NF|T_CODE1_IF1, 4); } +void and_(const Reg& d, const Operand& op1, const Operand& op2) { opROO(d, op1, op2, T_NF|T_CODE1_IF1, 0x20); } +void andn(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opRRO(r1, r2, op, T_APX|T_0F38|T_NF, 0xf2); } +void andnpd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_66, 0x55, isXMM_XMMorMEM); } +void andnps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F, 0x55, isXMM_XMMorMEM); } +void andpd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_66, 0x54, isXMM_XMMorMEM); } +void andps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F, 0x54, isXMM_XMMorMEM); } +void aor(const Address& addr, const Reg32e ®) { opMR(addr, reg, T_0F38|T_F2, 0x0FC, T_APX|T_F2); } +void axor(const Address& addr, const Reg32e ®) { opMR(addr, reg, T_0F38|T_F3, 0x0FC, T_APX|T_F3); } +void bextr(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opRRO(r1, r2, op, T_APX|T_0F38|T_NF, 0xf7); } +void blendpd(const Xmm& xmm, const Operand& op, int imm) { opSSE(xmm, op, T_66 | T_0F3A, 0x0D, isXMM_XMMorMEM, static_cast(imm)); } +void blendps(const Xmm& xmm, const Operand& op, int imm) { opSSE(xmm, op, T_66 | T_0F3A, 0x0C, isXMM_XMMorMEM, static_cast(imm)); } +void blendvpd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66|T_0F38, 0x15, isXMM_XMMorMEM, NONE); } +void blendvps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66|T_0F38, 0x14, isXMM_XMMorMEM, NONE); } +void blsi(const Reg32e& r, const Operand& op) { opRRO(Reg32e(3, r.getBit()), r, op, T_APX|T_0F38|T_NF, 0xf3); } +void blsmsk(const Reg32e& r, const Operand& op) { opRRO(Reg32e(2, r.getBit()), r, op, T_APX|T_0F38|T_NF, 0xf3); } +void blsr(const Reg32e& r, const Operand& op) { opRRO(Reg32e(1, r.getBit()), r, op, T_APX|T_0F38|T_NF, 0xf3); } +void bnd() { db(0xF2); } +void bndcl(const BoundsReg& bnd, const Operand& op) { opRext(op, i32e, bnd.getIdx(), T_F3 | T_0F, 0x1A, !op.isMEM()); } +void bndcn(const BoundsReg& bnd, const Operand& op) { opRext(op, i32e, bnd.getIdx(), T_F2 | T_0F, 0x1B, !op.isMEM()); } +void bndcu(const BoundsReg& bnd, const Operand& op) { opRext(op, i32e, bnd.getIdx(), T_F2 | T_0F, 0x1A, !op.isMEM()); } +void bndldx(const BoundsReg& bnd, const Address& addr) { opMIB(addr, bnd, T_0F, 0x1A); } +void bndmk(const BoundsReg& bnd, const Address& addr) { opMR(addr, bnd, T_F3 | T_0F, 0x1B); } +void bndmov(const Address& addr, const BoundsReg& bnd) { opMR(addr, bnd, T_66 | T_0F, 0x1B); } +void bndmov(const BoundsReg& bnd, const Operand& op) { opRO(bnd, op, T_66 | T_0F, 0x1A, op.isBNDREG()); } +void bndstx(const Address& addr, const BoundsReg& bnd) { opMIB(addr, bnd, T_0F, 0x1B); } +void bsf(const Reg®, const Operand& op) { opRO(reg, op, T_0F, 0xBC, op.isREG(16|i32e)); } +void bsr(const Reg®, const Operand& op) { opRO(reg, op, T_0F, 0xBD, op.isREG(16|i32e)); } +void bt(const Operand& op, const Reg& reg) { opRO(reg, op, T_0F, 0xA3, op.isREG(16|i32e) && op.getBit() == reg.getBit()); } +void bt(const Operand& op, uint8_t imm) { opRext(op, 16|i32e, 4, T_0F, 0xba, false, 1); db(imm); } +void btc(const Operand& op, const Reg& reg) { opRO(reg, op, T_0F, 0xBB, op.isREG(16|i32e) && op.getBit() == reg.getBit()); } +void btc(const Operand& op, uint8_t imm) { opRext(op, 16|i32e, 7, T_0F, 0xba, false, 1); db(imm); } +void btr(const Operand& op, const Reg& reg) { opRO(reg, op, T_0F, 0xB3, op.isREG(16|i32e) && op.getBit() == reg.getBit()); } +void btr(const Operand& op, uint8_t imm) { opRext(op, 16|i32e, 6, T_0F, 0xba, false, 1); db(imm); } +void bts(const Operand& op, const Reg& reg) { opRO(reg, op, T_0F, 0xAB, op.isREG(16|i32e) && op.getBit() == reg.getBit()); } +void bts(const Operand& op, uint8_t imm) { opRext(op, 16|i32e, 5, T_0F, 0xba, false, 1); db(imm); } +void bzhi(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opRRO(r1, r2, op, T_APX|T_0F38|T_NF, 0xf5); } +void cbw() { db(0x66); db(0x98); } +void ccmpa(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 7); } +void ccmpa(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 7); } +void ccmpae(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 3); } +void ccmpae(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 3); } +void ccmpb(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 2); } +void ccmpb(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 2); } +void ccmpbe(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 6); } +void ccmpbe(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 6); } +void ccmpc(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 2); } +void ccmpc(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 2); } +void ccmpe(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 4); } +void ccmpe(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 4); } +void ccmpf(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 11); } +void ccmpf(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 11); } +void ccmpg(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 15); } +void ccmpg(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 15); } +void ccmpge(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 13); } +void ccmpge(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 13); } +void ccmpl(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 12); } +void ccmpl(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 12); } +void ccmple(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 14); } +void ccmple(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 14); } +void ccmpna(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 6); } +void ccmpna(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 6); } +void ccmpnae(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 2); } +void ccmpnae(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 2); } +void ccmpnb(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 3); } +void ccmpnb(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 3); } +void ccmpnbe(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 7); } +void ccmpnbe(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 7); } +void ccmpnc(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 3); } +void ccmpnc(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 3); } +void ccmpne(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 5); } +void ccmpne(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 5); } +void ccmpng(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 14); } +void ccmpng(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 14); } +void ccmpnge(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 12); } +void ccmpnge(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 12); } +void ccmpnl(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 13); } +void ccmpnl(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 13); } +void ccmpnle(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 15); } +void ccmpnle(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 15); } +void ccmpno(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 1); } +void ccmpno(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 1); } +void ccmpns(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 9); } +void ccmpns(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 9); } +void ccmpnz(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 5); } +void ccmpnz(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 5); } +void ccmpo(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 0); } +void ccmpo(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 0); } +void ccmps(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 8); } +void ccmps(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 8); } +void ccmpt(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 10); } +void ccmpt(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 10); } +void ccmpz(const Operand& op, int imm, int dfv = 0) { opCcmpi(op, imm, dfv, 4); } +void ccmpz(const Operand& op1, const Operand& op2, int dfv = 0) { opCcmp(op1, op2, dfv, 0x38, 4); } +void cdq() { db(0x99); } +void cfcmovb(const Operand& op1, const Operand& op2) { opCfcmov(Reg(), op1, op2, 0x42); } +void cfcmovb(const Reg& d, const Reg& r, const Operand& op) { opCfcmov(d|T_nf, op, r, 0x42); } +void cfcmovbe(const Operand& op1, const Operand& op2) { opCfcmov(Reg(), op1, op2, 0x46); } +void cfcmovbe(const Reg& d, const Reg& r, const Operand& op) { opCfcmov(d|T_nf, op, r, 0x46); } +void cfcmovl(const Operand& op1, const Operand& op2) { opCfcmov(Reg(), op1, op2, 0x4C); } +void cfcmovl(const Reg& d, const Reg& r, const Operand& op) { opCfcmov(d|T_nf, op, r, 0x4C); } +void cfcmovle(const Operand& op1, const Operand& op2) { opCfcmov(Reg(), op1, op2, 0x4E); } +void cfcmovle(const Reg& d, const Reg& r, const Operand& op) { opCfcmov(d|T_nf, op, r, 0x4E); } +void cfcmovnb(const Operand& op1, const Operand& op2) { opCfcmov(Reg(), op1, op2, 0x43); } +void cfcmovnb(const Reg& d, const Reg& r, const Operand& op) { opCfcmov(d|T_nf, op, r, 0x43); } +void cfcmovnbe(const Operand& op1, const Operand& op2) { opCfcmov(Reg(), op1, op2, 0x47); } +void cfcmovnbe(const Reg& d, const Reg& r, const Operand& op) { opCfcmov(d|T_nf, op, r, 0x47); } +void cfcmovnl(const Operand& op1, const Operand& op2) { opCfcmov(Reg(), op1, op2, 0x4D); } +void cfcmovnl(const Reg& d, const Reg& r, const Operand& op) { opCfcmov(d|T_nf, op, r, 0x4D); } +void cfcmovnle(const Operand& op1, const Operand& op2) { opCfcmov(Reg(), op1, op2, 0x4F); } +void cfcmovnle(const Reg& d, const Reg& r, const Operand& op) { opCfcmov(d|T_nf, op, r, 0x4F); } +void cfcmovno(const Operand& op1, const Operand& op2) { opCfcmov(Reg(), op1, op2, 0x41); } +void cfcmovno(const Reg& d, const Reg& r, const Operand& op) { opCfcmov(d|T_nf, op, r, 0x41); } +void cfcmovnp(const Operand& op1, const Operand& op2) { opCfcmov(Reg(), op1, op2, 0x4B); } +void cfcmovnp(const Reg& d, const Reg& r, const Operand& op) { opCfcmov(d|T_nf, op, r, 0x4B); } +void cfcmovns(const Operand& op1, const Operand& op2) { opCfcmov(Reg(), op1, op2, 0x49); } +void cfcmovns(const Reg& d, const Reg& r, const Operand& op) { opCfcmov(d|T_nf, op, r, 0x49); } +void cfcmovnz(const Operand& op1, const Operand& op2) { opCfcmov(Reg(), op1, op2, 0x45); } +void cfcmovnz(const Reg& d, const Reg& r, const Operand& op) { opCfcmov(d|T_nf, op, r, 0x45); } +void cfcmovo(const Operand& op1, const Operand& op2) { opCfcmov(Reg(), op1, op2, 0x40); } +void cfcmovo(const Reg& d, const Reg& r, const Operand& op) { opCfcmov(d|T_nf, op, r, 0x40); } +void cfcmovp(const Operand& op1, const Operand& op2) { opCfcmov(Reg(), op1, op2, 0x4A); } +void cfcmovp(const Reg& d, const Reg& r, const Operand& op) { opCfcmov(d|T_nf, op, r, 0x4A); } +void cfcmovs(const Operand& op1, const Operand& op2) { opCfcmov(Reg(), op1, op2, 0x48); } +void cfcmovs(const Reg& d, const Reg& r, const Operand& op) { opCfcmov(d|T_nf, op, r, 0x48); } +void cfcmovz(const Operand& op1, const Operand& op2) { opCfcmov(Reg(), op1, op2, 0x44); } +void cfcmovz(const Reg& d, const Reg& r, const Operand& op) { opCfcmov(d|T_nf, op, r, 0x44); } +void clc() { db(0xF8); } +void cld() { db(0xFC); } +void cldemote(const Address& addr) { opMR(addr, eax, T_0F, 0x1C); } +void clflush(const Address& addr) { opMR(addr, Reg32(7), T_0F, 0xAE); } +void clflushopt(const Address& addr) { opMR(addr, Reg32(7), T_66 | T_0F, 0xAE); } +void cli() { db(0xFA); } +void clwb(const Address& addr) { opMR(addr, esi, T_66 | T_0F, 0xAE); } +void clzero() { db(0x0F); db(0x01); db(0xFC); } +void cmc() { db(0xF5); } +void cmova(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 7); }//-V524 +void cmova(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 7, op.isREG(16|i32e)); }//-V524 +void cmovae(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 3); }//-V524 +void cmovae(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 3, op.isREG(16|i32e)); }//-V524 +void cmovb(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 2); }//-V524 +void cmovb(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 2, op.isREG(16|i32e)); }//-V524 +void cmovbe(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 6); }//-V524 +void cmovbe(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 6, op.isREG(16|i32e)); }//-V524 +void cmovc(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 2); }//-V524 +void cmovc(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 2, op.isREG(16|i32e)); }//-V524 +void cmove(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 4); }//-V524 +void cmove(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 4, op.isREG(16|i32e)); }//-V524 +void cmovg(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 15); }//-V524 +void cmovg(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 15, op.isREG(16|i32e)); }//-V524 +void cmovge(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 13); }//-V524 +void cmovge(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 13, op.isREG(16|i32e)); }//-V524 +void cmovl(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 12); }//-V524 +void cmovl(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 12, op.isREG(16|i32e)); }//-V524 +void cmovle(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 14); }//-V524 +void cmovle(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 14, op.isREG(16|i32e)); }//-V524 +void cmovna(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 6); }//-V524 +void cmovna(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 6, op.isREG(16|i32e)); }//-V524 +void cmovnae(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 2); }//-V524 +void cmovnae(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 2, op.isREG(16|i32e)); }//-V524 +void cmovnb(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 3); }//-V524 +void cmovnb(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 3, op.isREG(16|i32e)); }//-V524 +void cmovnbe(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 7); }//-V524 +void cmovnbe(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 7, op.isREG(16|i32e)); }//-V524 +void cmovnc(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 3); }//-V524 +void cmovnc(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 3, op.isREG(16|i32e)); }//-V524 +void cmovne(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 5); }//-V524 +void cmovne(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 5, op.isREG(16|i32e)); }//-V524 +void cmovng(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 14); }//-V524 +void cmovng(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 14, op.isREG(16|i32e)); }//-V524 +void cmovnge(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 12); }//-V524 +void cmovnge(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 12, op.isREG(16|i32e)); }//-V524 +void cmovnl(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 13); }//-V524 +void cmovnl(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 13, op.isREG(16|i32e)); }//-V524 +void cmovnle(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 15); }//-V524 +void cmovnle(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 15, op.isREG(16|i32e)); }//-V524 +void cmovno(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 1); }//-V524 +void cmovno(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 1, op.isREG(16|i32e)); }//-V524 +void cmovnp(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 11); }//-V524 +void cmovnp(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 11, op.isREG(16|i32e)); }//-V524 +void cmovns(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 9); }//-V524 +void cmovns(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 9, op.isREG(16|i32e)); }//-V524 +void cmovnz(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 5); }//-V524 +void cmovnz(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 5, op.isREG(16|i32e)); }//-V524 +void cmovo(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 0); }//-V524 +void cmovo(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 0, op.isREG(16|i32e)); }//-V524 +void cmovp(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 10); }//-V524 +void cmovp(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 10, op.isREG(16|i32e)); }//-V524 +void cmovpe(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 10); }//-V524 +void cmovpe(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 10, op.isREG(16|i32e)); }//-V524 +void cmovpo(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 11); }//-V524 +void cmovpo(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 11, op.isREG(16|i32e)); }//-V524 +void cmovs(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 8); }//-V524 +void cmovs(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 8, op.isREG(16|i32e)); }//-V524 +void cmovz(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1, 0x40 | 4); }//-V524 +void cmovz(const Reg& reg, const Operand& op) { opRO(reg, op, T_0F, 0x40 | 4, op.isREG(16|i32e)); }//-V524 +void cmp(const Operand& op, uint32_t imm) { opOI(op, imm, 0x38, 7); } +void cmp(const Operand& op1, const Operand& op2) { opRO_MR(op1, op2, 0x38); } +void cmpeqpd(const Xmm& x, const Operand& op) { cmppd(x, op, 0); } +void cmpeqps(const Xmm& x, const Operand& op) { cmpps(x, op, 0); } +void cmpeqsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 0); } +void cmpeqss(const Xmm& x, const Operand& op) { cmpss(x, op, 0); } +void cmplepd(const Xmm& x, const Operand& op) { cmppd(x, op, 2); } +void cmpleps(const Xmm& x, const Operand& op) { cmpps(x, op, 2); } +void cmplesd(const Xmm& x, const Operand& op) { cmpsd(x, op, 2); } +void cmpless(const Xmm& x, const Operand& op) { cmpss(x, op, 2); } +void cmpltpd(const Xmm& x, const Operand& op) { cmppd(x, op, 1); } +void cmpltps(const Xmm& x, const Operand& op) { cmpps(x, op, 1); } +void cmpltsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 1); } +void cmpltss(const Xmm& x, const Operand& op) { cmpss(x, op, 1); } +void cmpneqpd(const Xmm& x, const Operand& op) { cmppd(x, op, 4); } +void cmpneqps(const Xmm& x, const Operand& op) { cmpps(x, op, 4); } +void cmpneqsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 4); } +void cmpneqss(const Xmm& x, const Operand& op) { cmpss(x, op, 4); } +void cmpnlepd(const Xmm& x, const Operand& op) { cmppd(x, op, 6); } +void cmpnleps(const Xmm& x, const Operand& op) { cmpps(x, op, 6); } +void cmpnlesd(const Xmm& x, const Operand& op) { cmpsd(x, op, 6); } +void cmpnless(const Xmm& x, const Operand& op) { cmpss(x, op, 6); } +void cmpnltpd(const Xmm& x, const Operand& op) { cmppd(x, op, 5); } +void cmpnltps(const Xmm& x, const Operand& op) { cmpps(x, op, 5); } +void cmpnltsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 5); } +void cmpnltss(const Xmm& x, const Operand& op) { cmpss(x, op, 5); } +void cmpordpd(const Xmm& x, const Operand& op) { cmppd(x, op, 7); } +void cmpordps(const Xmm& x, const Operand& op) { cmpps(x, op, 7); } +void cmpordsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 7); } +void cmpordss(const Xmm& x, const Operand& op) { cmpss(x, op, 7); } +void cmppd(const Xmm& xmm, const Operand& op, uint8_t imm8) { opSSE(xmm, op, T_0F | T_66, 0xC2, isXMM_XMMorMEM, imm8); } +void cmpps(const Xmm& xmm, const Operand& op, uint8_t imm8) { opSSE(xmm, op, T_0F, 0xC2, isXMM_XMMorMEM, imm8); } +void cmpsb() { db(0xA6); } +void cmpsd() { db(0xA7); } +void cmpsd(const Xmm& xmm, const Operand& op, uint8_t imm8) { opSSE(xmm, op, T_0F | T_F2, 0xC2, isXMM_XMMorMEM, imm8); } +void cmpss(const Xmm& xmm, const Operand& op, uint8_t imm8) { opSSE(xmm, op, T_0F | T_F3, 0xC2, isXMM_XMMorMEM, imm8); } +void cmpsw() { db(0x66); db(0xA7); } +void cmpunordpd(const Xmm& x, const Operand& op) { cmppd(x, op, 3); } +void cmpunordps(const Xmm& x, const Operand& op) { cmpps(x, op, 3); } +void cmpunordsd(const Xmm& x, const Operand& op) { cmpsd(x, op, 3); } +void cmpunordss(const Xmm& x, const Operand& op) { cmpss(x, op, 3); } +void cmpxchg(const Operand& op, const Reg& reg) { opRO(reg, op, T_0F, 0xB0 | (reg.isBit(8) ? 0 : 1), op.getBit() == reg.getBit()); } +void cmpxchg8b(const Address& addr) { opMR(addr, Reg32(1), T_0F, 0xC7); } +void comisd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66|T_0F, 0x2F, isXMM_XMMorMEM); } +void comiss(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F, 0x2F, isXMM_XMMorMEM); } +void cpuid() { db(0x0F); db(0xA2); } +void crc32(const Reg32e& r, const Operand& op) { if (!((r.isBit(32) && op.isBit(8|16|32)) || (r.isBit(64) && op.isBit(8|64)))) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) int code = 0xF0 | (op.isBit(8) ? 0 : 1); uint64_t type = op.isBit(16) ? T_66:0; type |= T_ALLOW_DIFF_SIZE; if (opROO(Reg(), op, static_cast(r), T_APX|type, code)) return; opRO(r, op, T_F2|T_0F38|type, code); } +void ctesta(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 7); } +void ctesta(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 7); } +void ctestae(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 3); } +void ctestae(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 3); } +void ctestb(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 2); } +void ctestb(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 2); } +void ctestbe(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 6); } +void ctestbe(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 6); } +void ctestc(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 2); } +void ctestc(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 2); } +void cteste(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 4); } +void cteste(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 4); } +void ctestf(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 11); } +void ctestf(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 11); } +void ctestg(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 15); } +void ctestg(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 15); } +void ctestge(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 13); } +void ctestge(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 13); } +void ctestl(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 12); } +void ctestl(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 12); } +void ctestle(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 14); } +void ctestle(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 14); } +void ctestna(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 6); } +void ctestna(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 6); } +void ctestnae(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 2); } +void ctestnae(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 2); } +void ctestnb(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 3); } +void ctestnb(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 3); } +void ctestnbe(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 7); } +void ctestnbe(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 7); } +void ctestnc(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 3); } +void ctestnc(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 3); } +void ctestne(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 5); } +void ctestne(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 5); } +void ctestng(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 14); } +void ctestng(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 14); } +void ctestnge(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 12); } +void ctestnge(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 12); } +void ctestnl(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 13); } +void ctestnl(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 13); } +void ctestnle(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 15); } +void ctestnle(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 15); } +void ctestno(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 1); } +void ctestno(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 1); } +void ctestns(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 9); } +void ctestns(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 9); } +void ctestnz(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 5); } +void ctestnz(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 5); } +void ctesto(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 0); } +void ctesto(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 0); } +void ctests(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 8); } +void ctests(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 8); } +void ctestt(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 10); } +void ctestt(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 10); } +void ctestz(const Operand& op, const Reg& r, int dfv = 0) { opCcmp(op, r, dfv, 0x84, 4); } +void ctestz(const Operand& op, int imm, int dfv = 0) { opTesti(op, imm, dfv, 4); } +void cvtdq2pd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_F3|T_0F, 0xE6, isXMM_XMMorMEM); } +void cvtdq2ps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F, 0x5B, isXMM_XMMorMEM); } +void cvtpd2dq(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_F2|T_0F, 0xE6, isXMM_XMMorMEM); } +void cvtpd2pi(const Reg& reg, const Operand& op) { opSSE(reg, op, T_66|T_0F, 0x2D, isMMX_XMMorMEM); } +void cvtpd2ps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66|T_0F, 0x5A, isXMM_XMMorMEM); } +void cvtpi2pd(const Reg& reg, const Operand& op) { opSSE(reg, op, T_66|T_0F, 0x2A, isXMM_MMXorMEM); } +void cvtpi2ps(const Reg& reg, const Operand& op) { opSSE(reg, op, T_0F, 0x2A, isXMM_MMXorMEM); } +void cvtps2dq(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66|T_0F, 0x5B, isXMM_XMMorMEM); } +void cvtps2pd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F, 0x5A, isXMM_XMMorMEM); } +void cvtps2pi(const Reg& reg, const Operand& op) { opSSE(reg, op, T_0F, 0x2D, isMMX_XMMorMEM); } +void cvtsd2si(const Reg& reg, const Operand& op) { opSSE(reg, op, T_F2|T_0F, 0x2D, isREG32_XMMorMEM); } +void cvtsd2ss(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_F2|T_0F, 0x5A, isXMM_XMMorMEM); } +void cvtsi2sd(const Reg& reg, const Operand& op) { opSSE(reg, op, T_F2|T_0F, 0x2A, isXMM_REG32orMEM); } +void cvtsi2ss(const Reg& reg, const Operand& op) { opSSE(reg, op, T_F3|T_0F, 0x2A, isXMM_REG32orMEM); } +void cvtss2sd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_F3|T_0F, 0x5A, isXMM_XMMorMEM); } +void cvtss2si(const Reg& reg, const Operand& op) { opSSE(reg, op, T_F3|T_0F, 0x2D, isREG32_XMMorMEM); } +void cvttpd2dq(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66|T_0F, 0xE6, isXMM_XMMorMEM); } +void cvttpd2pi(const Reg& reg, const Operand& op) { opSSE(reg, op, T_66|T_0F, 0x2C, isMMX_XMMorMEM); } +void cvttps2dq(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_F3|T_0F, 0x5B, isXMM_XMMorMEM); } +void cvttps2pi(const Reg& reg, const Operand& op) { opSSE(reg, op, T_0F, 0x2C, isMMX_XMMorMEM); } +void cvttsd2si(const Reg& reg, const Operand& op) { opSSE(reg, op, T_F2|T_0F, 0x2C, isREG32_XMMorMEM); } +void cvttss2si(const Reg& reg, const Operand& op) { opSSE(reg, op, T_F3|T_0F, 0x2C, isREG32_XMMorMEM); } +void cwd() { db(0x66); db(0x99); } +void cwde() { db(0x98); } +void dec(const Operand& op) { opIncDec(Reg(), op, 1); } +void dec(const Reg& d, const Operand& op) { opIncDec(d, op, 1); } +void div(const Operand& op) { opRext(op, 0, 6, T_APX|T_NF|T_CODE1_IF1, 0xF6); } +void divpd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_66, 0x5E, isXMM_XMMorMEM); } +void divps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F, 0x5E, isXMM_XMMorMEM); } +void divsd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_F2, 0x5E, isXMM_XMMorMEM); } +void divss(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_F3, 0x5E, isXMM_XMMorMEM); } +void dppd(const Xmm& xmm, const Operand& op, int imm) { opSSE(xmm, op, T_66 | T_0F3A, 0x41, isXMM_XMMorMEM, static_cast(imm)); } +void dpps(const Xmm& xmm, const Operand& op, int imm) { opSSE(xmm, op, T_66 | T_0F3A, 0x40, isXMM_XMMorMEM, static_cast(imm)); } +void emms() { db(0x0F); db(0x77); } +void endbr32() { db(0xF3); db(0x0F); db(0x1E); db(0xFB); } +void endbr64() { db(0xF3); db(0x0F); db(0x1E); db(0xFA); } +void enter(uint16_t x, uint8_t y) { db(0xC8); dw(x); db(y); } +void extractps(const Operand& op, const Xmm& xmm, uint8_t imm) { opExt(op, xmm, 0x17, imm); } +void f2xm1() { db(0xD9); db(0xF0); } +void fabs() { db(0xD9); db(0xE1); } +void fadd(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 0, 0); } +void fadd(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xD8C0, 0xDCC0); } +void fadd(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8C0, 0xDCC0); } +void faddp() { db(0xDE); db(0xC1); } +void faddp(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEC0); } +void faddp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEC0); } +void fbld(const Address& addr) { opMR(addr, Reg32(4), 0, 0xDF); } +void fbstp(const Address& addr) { opMR(addr, Reg32(6), 0, 0xDF); } +void fchs() { db(0xD9); db(0xE0); } +void fclex() { db(0x9B); db(0xDB); db(0xE2); } +void fcmovb(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDAC0, 0x00C0); } +void fcmovb(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDAC0, 0x00C0); } +void fcmovbe(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDAD0, 0x00D0); } +void fcmovbe(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDAD0, 0x00D0); } +void fcmove(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDAC8, 0x00C8); } +void fcmove(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDAC8, 0x00C8); } +void fcmovnb(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDBC0, 0x00C0); } +void fcmovnb(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDBC0, 0x00C0); } +void fcmovnbe(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDBD0, 0x00D0); } +void fcmovnbe(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDBD0, 0x00D0); } +void fcmovne(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDBC8, 0x00C8); } +void fcmovne(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDBC8, 0x00C8); } +void fcmovnu(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDBD8, 0x00D8); } +void fcmovnu(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDBD8, 0x00D8); } +void fcmovu(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDAD8, 0x00D8); } +void fcmovu(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDAD8, 0x00D8); } +void fcom() { db(0xD8); db(0xD1); } +void fcom(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 2, 0); } +void fcom(const Fpu& reg) { opFpu(reg, 0xD8, 0xD0); } +void fcomi(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDBF0, 0x00F0); } +void fcomi(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDBF0, 0x00F0); } +void fcomip(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDFF0, 0x00F0); } +void fcomip(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDFF0, 0x00F0); } +void fcomp() { db(0xD8); db(0xD9); } +void fcomp(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 3, 0); } +void fcomp(const Fpu& reg) { opFpu(reg, 0xD8, 0xD8); } +void fcompp() { db(0xDE); db(0xD9); } +void fcos() { db(0xD9); db(0xFF); } +void fdecstp() { db(0xD9); db(0xF6); } +void fdiv(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 6, 0); } +void fdiv(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xD8F0, 0xDCF8); } +void fdiv(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8F0, 0xDCF8); } +void fdivp() { db(0xDE); db(0xF9); } +void fdivp(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEF8); } +void fdivp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEF8); } +void fdivr(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 7, 0); } +void fdivr(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xD8F8, 0xDCF0); } +void fdivr(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8F8, 0xDCF0); } +void fdivrp() { db(0xDE); db(0xF1); } +void fdivrp(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEF0); } +void fdivrp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEF0); } +void ffree(const Fpu& reg) { opFpu(reg, 0xDD, 0xC0); } +void fiadd(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 0, 0); } +void ficom(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 2, 0); } +void ficomp(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 3, 0); } +void fidiv(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 6, 0); } +void fidivr(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 7, 0); } +void fild(const Address& addr) { opFpuMem(addr, 0xDF, 0xDB, 0xDF, 0, 5); } +void fimul(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 1, 0); } +void fincstp() { db(0xD9); db(0xF7); } +void finit() { db(0x9B); db(0xDB); db(0xE3); } +void fist(const Address& addr) { opFpuMem(addr, 0xDF, 0xDB, 0x00, 2, 0); } +void fistp(const Address& addr) { opFpuMem(addr, 0xDF, 0xDB, 0xDF, 3, 7); } +void fisttp(const Address& addr) { opFpuMem(addr, 0xDF, 0xDB, 0xDD, 1, 0); } +void fisub(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 4, 0); } +void fisubr(const Address& addr) { opFpuMem(addr, 0xDE, 0xDA, 0x00, 5, 0); } +void fld(const Address& addr) { opFpuMem(addr, 0x00, 0xD9, 0xDD, 0, 0); } +void fld(const Fpu& reg) { opFpu(reg, 0xD9, 0xC0); } +void fld1() { db(0xD9); db(0xE8); } +void fldcw(const Address& addr) { opMR(addr, Reg32(5), 0, 0xD9); } +void fldenv(const Address& addr) { opMR(addr, Reg32(4), 0, 0xD9); } +void fldl2e() { db(0xD9); db(0xEA); } +void fldl2t() { db(0xD9); db(0xE9); } +void fldlg2() { db(0xD9); db(0xEC); } +void fldln2() { db(0xD9); db(0xED); } +void fldpi() { db(0xD9); db(0xEB); } +void fldz() { db(0xD9); db(0xEE); } +void fmul(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 1, 0); } +void fmul(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xD8C8, 0xDCC8); } +void fmul(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8C8, 0xDCC8); } +void fmulp() { db(0xDE); db(0xC9); } +void fmulp(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEC8); } +void fmulp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEC8); } +void fnclex() { db(0xDB); db(0xE2); } +void fninit() { db(0xDB); db(0xE3); } +void fnop() { db(0xD9); db(0xD0); } +void fnsave(const Address& addr) { opMR(addr, Reg32(6), 0, 0xDD); } +void fnstcw(const Address& addr) { opMR(addr, Reg32(7), 0, 0xD9); } +void fnstenv(const Address& addr) { opMR(addr, Reg32(6), 0, 0xD9); } +void fnstsw(const Address& addr) { opMR(addr, Reg32(7), 0, 0xDD); } +void fnstsw(const Reg16& r) { if (r.getIdx() != Operand::AX) XBYAK_THROW(ERR_BAD_PARAMETER) db(0xDF); db(0xE0); } +void fpatan() { db(0xD9); db(0xF3); } +void fprem() { db(0xD9); db(0xF8); } +void fprem1() { db(0xD9); db(0xF5); } +void fptan() { db(0xD9); db(0xF2); } +void frndint() { db(0xD9); db(0xFC); } +void frstor(const Address& addr) { opMR(addr, Reg32(4), 0, 0xDD); } +void fsave(const Address& addr) { db(0x9B); opMR(addr, Reg32(6), 0, 0xDD); } +void fscale() { db(0xD9); db(0xFD); } +void fsin() { db(0xD9); db(0xFE); } +void fsincos() { db(0xD9); db(0xFB); } +void fsqrt() { db(0xD9); db(0xFA); } +void fst(const Address& addr) { opFpuMem(addr, 0x00, 0xD9, 0xDD, 2, 0); } +void fst(const Fpu& reg) { opFpu(reg, 0xDD, 0xD0); } +void fstcw(const Address& addr) { db(0x9B); opMR(addr, Reg32(7), 0, 0xD9); } +void fstenv(const Address& addr) { db(0x9B); opMR(addr, Reg32(6), 0, 0xD9); } +void fstp(const Address& addr) { opFpuMem(addr, 0x00, 0xD9, 0xDD, 3, 0); } +void fstp(const Fpu& reg) { opFpu(reg, 0xDD, 0xD8); } +void fstsw(const Address& addr) { db(0x9B); opMR(addr, Reg32(7), 0, 0xDD); } +void fstsw(const Reg16& r) { if (r.getIdx() != Operand::AX) XBYAK_THROW(ERR_BAD_PARAMETER) db(0x9B); db(0xDF); db(0xE0); } +void fsub(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 4, 0); } +void fsub(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xD8E0, 0xDCE8); } +void fsub(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8E0, 0xDCE8); } +void fsubp() { db(0xDE); db(0xE9); } +void fsubp(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEE8); } +void fsubp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEE8); } +void fsubr(const Address& addr) { opFpuMem(addr, 0x00, 0xD8, 0xDC, 5, 0); } +void fsubr(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xD8E8, 0xDCE0); } +void fsubr(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xD8E8, 0xDCE0); } +void fsubrp() { db(0xDE); db(0xE1); } +void fsubrp(const Fpu& reg1) { opFpuFpu(reg1, st0, 0x0000, 0xDEE0); } +void fsubrp(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0x0000, 0xDEE0); } +void ftst() { db(0xD9); db(0xE4); } +void fucom() { db(0xDD); db(0xE1); } +void fucom(const Fpu& reg) { opFpu(reg, 0xDD, 0xE0); } +void fucomi(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDBE8, 0x00E8); } +void fucomi(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDBE8, 0x00E8); } +void fucomip(const Fpu& reg1) { opFpuFpu(st0, reg1, 0xDFE8, 0x00E8); } +void fucomip(const Fpu& reg1, const Fpu& reg2) { opFpuFpu(reg1, reg2, 0xDFE8, 0x00E8); } +void fucomp() { db(0xDD); db(0xE9); } +void fucomp(const Fpu& reg) { opFpu(reg, 0xDD, 0xE8); } +void fucompp() { db(0xDA); db(0xE9); } +void fwait() { db(0x9B); } +void fxam() { db(0xD9); db(0xE5); } +void fxch() { db(0xD9); db(0xC9); } +void fxch(const Fpu& reg) { opFpu(reg, 0xD9, 0xC8); } +void fxrstor(const Address& addr) { opMR(addr, Reg32(1), T_0F, 0xAE); } +void fxtract() { db(0xD9); db(0xF4); } +void fyl2x() { db(0xD9); db(0xF1); } +void fyl2xp1() { db(0xD9); db(0xF9); } +void gf2p8affineinvqb(const Xmm& xmm, const Operand& op, int imm) { opSSE(xmm, op, T_66 | T_0F3A, 0xCF, isXMM_XMMorMEM, static_cast(imm)); } +void gf2p8affineqb(const Xmm& xmm, const Operand& op, int imm) { opSSE(xmm, op, T_66 | T_0F3A, 0xCE, isXMM_XMMorMEM, static_cast(imm)); } +void gf2p8mulb(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66 | T_0F38, 0xCF, isXMM_XMMorMEM); } +void haddpd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66|T_0F|T_YMM, 0x7C, isXMM_XMMorMEM); } +void haddps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_F2|T_0F|T_YMM, 0x7C, isXMM_XMMorMEM); } +void hlt() { db(0xF4); } +void hsubpd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66|T_0F|T_YMM, 0x7D, isXMM_XMMorMEM); } +void hsubps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_F2|T_0F|T_YMM, 0x7D, isXMM_XMMorMEM); } +void idiv(const Operand& op) { opRext(op, 0, 7, T_APX|T_NF|T_CODE1_IF1, 0xF6); } +void imul(const Operand& op) { opRext(op, 0, 5, T_APX|T_NF|T_CODE1_IF1, 0xF6); } +void imul(const Reg& d, const Reg& reg, const Operand& op) { opROO(d, op, reg, T_APX|T_ND1|T_NF, 0xAF); } +void imul(const Reg& reg, const Operand& op) { if (opROO(Reg(), op, reg, T_APX|T_NF, 0xAF)) return; opRO(reg, op, T_0F, 0xAF, reg.getKind() == op.getKind()); } +void in_(const Reg& a, const Reg& d) { opInOut(a, d, 0xEC); } +void in_(const Reg& a, uint8_t v) { opInOut(a, 0xE4, v); } +void inc(const Operand& op) { opIncDec(Reg(), op, 0); } +void inc(const Reg& d, const Operand& op) { opIncDec(d, op, 0); } +void insertps(const Xmm& xmm, const Operand& op, uint8_t imm) { opSSE(xmm, op, T_66 | T_0F3A, 0x21, isXMM_XMMorMEM, imm); } +void int3() { db(0xCC); } +void int_(uint8_t x) { db(0xCD); db(x); } +void ja(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x77, 0x87, 0x0F); }//-V524 +void ja(const char *label, LabelType type = T_AUTO) { ja(std::string(label), type); }//-V524 +void ja(const void *addr) { opJmpAbs(addr, T_NEAR, 0x77, 0x87, 0x0F); }//-V524 +void ja(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x77, 0x87, 0x0F); }//-V524 +void jae(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x73, 0x83, 0x0F); }//-V524 +void jae(const char *label, LabelType type = T_AUTO) { jae(std::string(label), type); }//-V524 +void jae(const void *addr) { opJmpAbs(addr, T_NEAR, 0x73, 0x83, 0x0F); }//-V524 +void jae(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x73, 0x83, 0x0F); }//-V524 +void jb(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x72, 0x82, 0x0F); }//-V524 +void jb(const char *label, LabelType type = T_AUTO) { jb(std::string(label), type); }//-V524 +void jb(const void *addr) { opJmpAbs(addr, T_NEAR, 0x72, 0x82, 0x0F); }//-V524 +void jb(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x72, 0x82, 0x0F); }//-V524 +void jbe(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x76, 0x86, 0x0F); }//-V524 +void jbe(const char *label, LabelType type = T_AUTO) { jbe(std::string(label), type); }//-V524 +void jbe(const void *addr) { opJmpAbs(addr, T_NEAR, 0x76, 0x86, 0x0F); }//-V524 +void jbe(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x76, 0x86, 0x0F); }//-V524 +void jc(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x72, 0x82, 0x0F); }//-V524 +void jc(const char *label, LabelType type = T_AUTO) { jc(std::string(label), type); }//-V524 +void jc(const void *addr) { opJmpAbs(addr, T_NEAR, 0x72, 0x82, 0x0F); }//-V524 +void jc(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x72, 0x82, 0x0F); }//-V524 +void je(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x74, 0x84, 0x0F); }//-V524 +void je(const char *label, LabelType type = T_AUTO) { je(std::string(label), type); }//-V524 +void je(const void *addr) { opJmpAbs(addr, T_NEAR, 0x74, 0x84, 0x0F); }//-V524 +void je(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x74, 0x84, 0x0F); }//-V524 +void jg(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x7F, 0x8F, 0x0F); }//-V524 +void jg(const char *label, LabelType type = T_AUTO) { jg(std::string(label), type); }//-V524 +void jg(const void *addr) { opJmpAbs(addr, T_NEAR, 0x7F, 0x8F, 0x0F); }//-V524 +void jg(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x7F, 0x8F, 0x0F); }//-V524 +void jge(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x7D, 0x8D, 0x0F); }//-V524 +void jge(const char *label, LabelType type = T_AUTO) { jge(std::string(label), type); }//-V524 +void jge(const void *addr) { opJmpAbs(addr, T_NEAR, 0x7D, 0x8D, 0x0F); }//-V524 +void jge(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x7D, 0x8D, 0x0F); }//-V524 +void jl(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x7C, 0x8C, 0x0F); }//-V524 +void jl(const char *label, LabelType type = T_AUTO) { jl(std::string(label), type); }//-V524 +void jl(const void *addr) { opJmpAbs(addr, T_NEAR, 0x7C, 0x8C, 0x0F); }//-V524 +void jl(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x7C, 0x8C, 0x0F); }//-V524 +void jle(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x7E, 0x8E, 0x0F); }//-V524 +void jle(const char *label, LabelType type = T_AUTO) { jle(std::string(label), type); }//-V524 +void jle(const void *addr) { opJmpAbs(addr, T_NEAR, 0x7E, 0x8E, 0x0F); }//-V524 +void jle(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x7E, 0x8E, 0x0F); }//-V524 +void jna(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x76, 0x86, 0x0F); }//-V524 +void jna(const char *label, LabelType type = T_AUTO) { jna(std::string(label), type); }//-V524 +void jna(const void *addr) { opJmpAbs(addr, T_NEAR, 0x76, 0x86, 0x0F); }//-V524 +void jna(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x76, 0x86, 0x0F); }//-V524 +void jnae(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x72, 0x82, 0x0F); }//-V524 +void jnae(const char *label, LabelType type = T_AUTO) { jnae(std::string(label), type); }//-V524 +void jnae(const void *addr) { opJmpAbs(addr, T_NEAR, 0x72, 0x82, 0x0F); }//-V524 +void jnae(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x72, 0x82, 0x0F); }//-V524 +void jnb(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x73, 0x83, 0x0F); }//-V524 +void jnb(const char *label, LabelType type = T_AUTO) { jnb(std::string(label), type); }//-V524 +void jnb(const void *addr) { opJmpAbs(addr, T_NEAR, 0x73, 0x83, 0x0F); }//-V524 +void jnb(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x73, 0x83, 0x0F); }//-V524 +void jnbe(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x77, 0x87, 0x0F); }//-V524 +void jnbe(const char *label, LabelType type = T_AUTO) { jnbe(std::string(label), type); }//-V524 +void jnbe(const void *addr) { opJmpAbs(addr, T_NEAR, 0x77, 0x87, 0x0F); }//-V524 +void jnbe(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x77, 0x87, 0x0F); }//-V524 +void jnc(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x73, 0x83, 0x0F); }//-V524 +void jnc(const char *label, LabelType type = T_AUTO) { jnc(std::string(label), type); }//-V524 +void jnc(const void *addr) { opJmpAbs(addr, T_NEAR, 0x73, 0x83, 0x0F); }//-V524 +void jnc(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x73, 0x83, 0x0F); }//-V524 +void jne(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x75, 0x85, 0x0F); }//-V524 +void jne(const char *label, LabelType type = T_AUTO) { jne(std::string(label), type); }//-V524 +void jne(const void *addr) { opJmpAbs(addr, T_NEAR, 0x75, 0x85, 0x0F); }//-V524 +void jne(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x75, 0x85, 0x0F); }//-V524 +void jng(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x7E, 0x8E, 0x0F); }//-V524 +void jng(const char *label, LabelType type = T_AUTO) { jng(std::string(label), type); }//-V524 +void jng(const void *addr) { opJmpAbs(addr, T_NEAR, 0x7E, 0x8E, 0x0F); }//-V524 +void jng(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x7E, 0x8E, 0x0F); }//-V524 +void jnge(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x7C, 0x8C, 0x0F); }//-V524 +void jnge(const char *label, LabelType type = T_AUTO) { jnge(std::string(label), type); }//-V524 +void jnge(const void *addr) { opJmpAbs(addr, T_NEAR, 0x7C, 0x8C, 0x0F); }//-V524 +void jnge(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x7C, 0x8C, 0x0F); }//-V524 +void jnl(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x7D, 0x8D, 0x0F); }//-V524 +void jnl(const char *label, LabelType type = T_AUTO) { jnl(std::string(label), type); }//-V524 +void jnl(const void *addr) { opJmpAbs(addr, T_NEAR, 0x7D, 0x8D, 0x0F); }//-V524 +void jnl(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x7D, 0x8D, 0x0F); }//-V524 +void jnle(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x7F, 0x8F, 0x0F); }//-V524 +void jnle(const char *label, LabelType type = T_AUTO) { jnle(std::string(label), type); }//-V524 +void jnle(const void *addr) { opJmpAbs(addr, T_NEAR, 0x7F, 0x8F, 0x0F); }//-V524 +void jnle(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x7F, 0x8F, 0x0F); }//-V524 +void jno(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x71, 0x81, 0x0F); }//-V524 +void jno(const char *label, LabelType type = T_AUTO) { jno(std::string(label), type); }//-V524 +void jno(const void *addr) { opJmpAbs(addr, T_NEAR, 0x71, 0x81, 0x0F); }//-V524 +void jno(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x71, 0x81, 0x0F); }//-V524 +void jnp(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x7B, 0x8B, 0x0F); }//-V524 +void jnp(const char *label, LabelType type = T_AUTO) { jnp(std::string(label), type); }//-V524 +void jnp(const void *addr) { opJmpAbs(addr, T_NEAR, 0x7B, 0x8B, 0x0F); }//-V524 +void jnp(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x7B, 0x8B, 0x0F); }//-V524 +void jns(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x79, 0x89, 0x0F); }//-V524 +void jns(const char *label, LabelType type = T_AUTO) { jns(std::string(label), type); }//-V524 +void jns(const void *addr) { opJmpAbs(addr, T_NEAR, 0x79, 0x89, 0x0F); }//-V524 +void jns(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x79, 0x89, 0x0F); }//-V524 +void jnz(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x75, 0x85, 0x0F); }//-V524 +void jnz(const char *label, LabelType type = T_AUTO) { jnz(std::string(label), type); }//-V524 +void jnz(const void *addr) { opJmpAbs(addr, T_NEAR, 0x75, 0x85, 0x0F); }//-V524 +void jnz(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x75, 0x85, 0x0F); }//-V524 +void jo(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x70, 0x80, 0x0F); }//-V524 +void jo(const char *label, LabelType type = T_AUTO) { jo(std::string(label), type); }//-V524 +void jo(const void *addr) { opJmpAbs(addr, T_NEAR, 0x70, 0x80, 0x0F); }//-V524 +void jo(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x70, 0x80, 0x0F); }//-V524 +void jp(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x7A, 0x8A, 0x0F); }//-V524 +void jp(const char *label, LabelType type = T_AUTO) { jp(std::string(label), type); }//-V524 +void jp(const void *addr) { opJmpAbs(addr, T_NEAR, 0x7A, 0x8A, 0x0F); }//-V524 +void jp(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x7A, 0x8A, 0x0F); }//-V524 +void jpe(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x7A, 0x8A, 0x0F); }//-V524 +void jpe(const char *label, LabelType type = T_AUTO) { jpe(std::string(label), type); }//-V524 +void jpe(const void *addr) { opJmpAbs(addr, T_NEAR, 0x7A, 0x8A, 0x0F); }//-V524 +void jpe(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x7A, 0x8A, 0x0F); }//-V524 +void jpo(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x7B, 0x8B, 0x0F); }//-V524 +void jpo(const char *label, LabelType type = T_AUTO) { jpo(std::string(label), type); }//-V524 +void jpo(const void *addr) { opJmpAbs(addr, T_NEAR, 0x7B, 0x8B, 0x0F); }//-V524 +void jpo(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x7B, 0x8B, 0x0F); }//-V524 +void js(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x78, 0x88, 0x0F); }//-V524 +void js(const char *label, LabelType type = T_AUTO) { js(std::string(label), type); }//-V524 +void js(const void *addr) { opJmpAbs(addr, T_NEAR, 0x78, 0x88, 0x0F); }//-V524 +void js(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x78, 0x88, 0x0F); }//-V524 +void jz(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0x74, 0x84, 0x0F); }//-V524 +void jz(const char *label, LabelType type = T_AUTO) { jz(std::string(label), type); }//-V524 +void jz(const void *addr) { opJmpAbs(addr, T_NEAR, 0x74, 0x84, 0x0F); }//-V524 +void jz(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0x74, 0x84, 0x0F); }//-V524 +void lahf() { db(0x9F); } +void lddqu(const Xmm& xmm, const Address& addr) { opSSE(xmm, addr, T_F2 | T_0F, 0xF0); } +void ldmxcsr(const Address& addr) { opMR(addr, Reg32(2), T_0F, 0xAE); } +void lea(const Reg& reg, const Address& addr) { if (!reg.isBit(16 | i32e)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opMR(addr, reg, 0, 0x8D); } +void leave() { db(0xC9); } +void lfence() { db(0x0F); db(0xAE); db(0xE8); } +void lfs(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, T_0F, 0xB4); } +void lgs(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, T_0F, 0xB5); } +void lock() { db(0xF0); } +void lodsb() { db(0xAC); } +void lodsd() { db(0xAD); } +void lodsw() { db(0x66); db(0xAD); } +void loop(const Label& label) { opJmp(label, T_SHORT, 0xE2, 0, 0); } +void loop(const char *label) { loop(std::string(label)); } +void loop(std::string label) { opJmp(label, T_SHORT, 0xE2, 0, 0); } +void loope(const Label& label) { opJmp(label, T_SHORT, 0xE1, 0, 0); } +void loope(const char *label) { loope(std::string(label)); } +void loope(std::string label) { opJmp(label, T_SHORT, 0xE1, 0, 0); } +void loopne(const Label& label) { opJmp(label, T_SHORT, 0xE0, 0, 0); } +void loopne(const char *label) { loopne(std::string(label)); } +void loopne(std::string label) { opJmp(label, T_SHORT, 0xE0, 0, 0); } +void lss(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, T_0F, 0xB2); } +void lzcnt(const Reg®, const Operand& op) { if (opROO(Reg(), op, reg, T_APX|T_NF, 0xF5)) return; opCnt(reg, op, 0xBD); } +void maskmovdqu(const Xmm& reg1, const Xmm& reg2) { opSSE(reg1, reg2, T_66|T_0F, 0xF7); } +void maskmovq(const Mmx& reg1, const Mmx& reg2) { opSSE(reg1, reg2, T_0F, 0xF7); } +void maxpd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_66, 0x5F, isXMM_XMMorMEM); } +void maxps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F, 0x5F, isXMM_XMMorMEM); } +void maxsd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_F2, 0x5F, isXMM_XMMorMEM); } +void maxss(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_F3, 0x5F, isXMM_XMMorMEM); } +void mfence() { db(0x0F); db(0xAE); db(0xF0); } +void minpd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_66, 0x5D, isXMM_XMMorMEM); } +void minps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F, 0x5D, isXMM_XMMorMEM); } +void minsd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_F2, 0x5D, isXMM_XMMorMEM); } +void minss(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_F3, 0x5D, isXMM_XMMorMEM); } +void monitor() { db(0x0F); db(0x01); db(0xC8); } +void monitorx() { db(0x0F); db(0x01); db(0xFA); } +void movapd(const Address& addr, const Xmm& xmm) { opSSE(xmm, addr, T_0F|T_66, 0x29); } +void movapd(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x28, T_0F, T_66); } +void movaps(const Address& addr, const Xmm& xmm) { opSSE(xmm, addr, T_0F|T_NONE, 0x29); } +void movaps(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x28, T_0F, T_NONE); } +void movbe(const Address& addr, const Reg& reg) { opMR(addr, reg, T_0F38, 0xF1, T_APX, 0x61); } +void movbe(const Reg& reg, const Address& addr) { opMR(addr, reg, T_0F38, 0xF0, T_APX, 0x60); } +void movd(const Mmx& mmx, const Operand& op) { if (!(op.isMEM() || op.isREG(32))) XBYAK_THROW(ERR_BAD_COMBINATION) if (mmx.isXMM()) db(0x66); opSSE(mmx, op, T_0F | T_ALLOW_DIFF_SIZE, 0x6E); } +void movd(const Operand& op, const Mmx& mmx) { if (!(op.isMEM() || op.isREG(32))) XBYAK_THROW(ERR_BAD_COMBINATION) if (mmx.isXMM()) db(0x66); opSSE(mmx, op, T_0F | T_ALLOW_DIFF_SIZE, 0x7E); } +void movddup(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_DUP|T_F2|T_0F|T_EW1|T_YMM|T_EVEX, 0x12, isXMM_XMMorMEM, NONE); } +void movdir64b(const Reg& reg, const Address& addr) { opMR(addr, reg.cvt32(), T_66|T_0F38, 0xF8, T_APX|T_66); } +void movdiri(const Address& addr, const Reg32e& reg) { opMR(addr, reg, T_0F38, 0xF9, T_APX); } +void movdq2q(const Mmx& mmx, const Xmm& xmm) { opSSE(mmx, xmm, T_F2 | T_0F, 0xD6); } +void movdqa(const Address& addr, const Xmm& xmm) { opSSE(xmm, addr, T_0F|T_66, 0x7F); } +void movdqa(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x6F, T_0F, T_66); } +void movdqu(const Address& addr, const Xmm& xmm) { opSSE(xmm, addr, T_0F|T_F3, 0x7F); } +void movdqu(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x6F, T_0F, T_F3); } +void movhlps(const Xmm& reg1, const Xmm& reg2) { opSSE(reg1, reg2, T_0F, 0x12); } +void movhpd(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, T_66|T_0F, 0x16); } +void movhps(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, T_0F, 0x16); } +void movlhps(const Xmm& reg1, const Xmm& reg2) { opSSE(reg1, reg2, T_0F, 0x16); } +void movlpd(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, T_66|T_0F, 0x12); } +void movlps(const Operand& op1, const Operand& op2) { opMovXMM(op1, op2, T_0F, 0x12); } +void movmskpd(const Reg32e& reg, const Xmm& xmm) { db(0x66); movmskps(reg, xmm); } +void movmskps(const Reg32e& reg, const Xmm& xmm) { opSSE(reg, xmm, T_0F, 0x50); } +void movntdq(const Address& addr, const Xmm& reg) { if (reg.getIdx() >= 16) XBYAK_THROW(ERR_BAD_PARAMETER) opSSE(Reg16(reg.getIdx()), addr, T_0F, 0xE7); } +void movntdqa(const Xmm& xmm, const Address& addr) { opSSE(xmm, addr, T_66 | T_0F38, 0x2A); } +void movnti(const Address& addr, const Reg32e& reg) { opMR(addr, reg, T_0F, 0xC3); } +void movntpd(const Address& addr, const Xmm& reg) { if (reg.getIdx() >= 16) XBYAK_THROW(ERR_BAD_PARAMETER) opSSE(Reg16(reg.getIdx()), addr, T_0F, 0x2B); } +void movntps(const Address& addr, const Xmm& xmm) { opSSE(Xmm(xmm.getIdx()), addr, T_0F, 0x2B); } +void movntq(const Address& addr, const Mmx& mmx) { if (!mmx.isMMX()) XBYAK_THROW(ERR_BAD_COMBINATION) opSSE(mmx, addr, T_0F, 0xE7); } +void movq(const Address& addr, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opSSE(mmx, addr, T_0F | T_ALLOW_DIFF_SIZE, mmx.isXMM() ? 0xD6 : 0x7F); } +void movq(const Mmx& mmx, const Operand& op) { if (!op.isMEM() && mmx.getKind() != op.getKind()) XBYAK_THROW(ERR_BAD_COMBINATION) if (mmx.isXMM()) db(0xF3); opSSE(mmx, op, T_0F | T_ALLOW_DIFF_SIZE, mmx.isXMM() ? 0x7E : 0x6F); } +void movq2dq(const Xmm& xmm, const Mmx& mmx) { opSSE(xmm, mmx, T_F3 | T_0F, 0xD6); } +void movsb() { db(0xA4); } +void movsd() { db(0xA5); } +void movsd(const Address& addr, const Xmm& xmm) { opSSE(xmm, addr, T_0F|T_F2, 0x11); } +void movsd(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x10, T_0F, T_F2); } +void movshdup(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_F3|T_0F|T_W0|T_YMM|T_EVEX, 0x16, isXMM_XMMorMEM, NONE); } +void movsldup(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_F3|T_0F|T_W0|T_YMM|T_EVEX, 0x12, isXMM_XMMorMEM, NONE); } +void movss(const Address& addr, const Xmm& xmm) { opSSE(xmm, addr, T_0F|T_F3, 0x11); } +void movss(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x10, T_0F, T_F3); } +void movsw() { db(0x66); db(0xA5); } +void movsx(const Reg& reg, const Operand& op) { opMovxx(reg, op, 0xBE); } +void movupd(const Address& addr, const Xmm& xmm) { opSSE(xmm, addr, T_0F|T_66, 0x11); } +void movupd(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x10, T_0F, T_66); } +void movups(const Address& addr, const Xmm& xmm) { opSSE(xmm, addr, T_0F|T_NONE, 0x11); } +void movups(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x10, T_0F, T_NONE); } +void movzx(const Reg& reg, const Operand& op) { opMovxx(reg, op, 0xB6); } +void mpsadbw(const Xmm& xmm, const Operand& op, int imm) { opSSE(xmm, op, T_66 | T_0F3A, 0x42, isXMM_XMMorMEM, static_cast(imm)); } +void mul(const Operand& op) { opRext(op, 0, 4, T_APX|T_NF|T_CODE1_IF1, 0xF6); } +void mulpd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_66, 0x59, isXMM_XMMorMEM); } +void mulps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F, 0x59, isXMM_XMMorMEM); } +void mulsd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_F2, 0x59, isXMM_XMMorMEM); } +void mulss(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_F3, 0x59, isXMM_XMMorMEM); } +void mulx(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opRRO(r1, r2, op, T_APX|T_F2|T_0F38, 0xf6); } +void mwait() { db(0x0F); db(0x01); db(0xC9); } +void mwaitx() { db(0x0F); db(0x01); db(0xFB); } +void neg(const Operand& op) { opRext(op, 0, 3, T_APX|T_NF|T_CODE1_IF1, 0xF6); } +void neg(const Reg& d, const Operand& op) { opROO(d, op, Reg(3, Operand::REG, d.getBit()), T_APX|T_NF|T_CODE1_IF1|T_ND1, 0xF6); } +void not_(const Operand& op) { opRext(op, 0, 2, T_APX|T_CODE1_IF1, 0xF6); } +void not_(const Reg& d, const Operand& op) { opROO(d, op, Reg(2, Operand::REG, d.getBit()), T_APX|T_CODE1_IF1|T_ND1, 0xF6); } +void or_(const Operand& op, uint32_t imm) { opOI(op, imm, 0x08, 1); } +void or_(const Operand& op1, const Operand& op2) { opRO_MR(op1, op2, 0x08); } +void or_(const Reg& d, const Operand& op, uint32_t imm) { opROI(d, op, imm, T_NF|T_CODE1_IF1, 1); } +void or_(const Reg& d, const Operand& op1, const Operand& op2) { opROO(d, op1, op2, T_NF|T_CODE1_IF1, 0x08); } +void orpd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_66, 0x56, isXMM_XMMorMEM); } +void orps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F, 0x56, isXMM_XMMorMEM); } +void out_(const Reg& d, const Reg& a) { opInOut(a, d, 0xEE); } +void out_(uint8_t v, const Reg& a) { opInOut(a, 0xE6, v); } +void outsb() { db(0x6E); } +void outsd() { db(0x6F); } +void outsw() { db(0x66); db(0x6F); } +void pabsb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1C, T_0F38, T_66); } +void pabsd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1E, T_0F38, T_66); } +void pabsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x1D, T_0F38, T_66); } +void packssdw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x6B); } +void packsswb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x63); } +void packusdw(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66 | T_0F38, 0x2B, isXMM_XMMorMEM); } +void packuswb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x67); } +void paddb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xFC); } +void paddd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xFE); } +void paddq(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xD4); } +void paddsb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xEC); } +void paddsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xED); } +void paddusb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDC); } +void paddusw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDD); } +void paddw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xFD); } +void palignr(const Mmx& mmx, const Operand& op, int imm) { opMMX(mmx, op, 0x0F, T_0F3A, T_66, static_cast(imm)); } +void pand(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDB); } +void pandn(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDF); } +void pause() { db(0xF3); db(0x90); } +void pavgb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE0); } +void pavgw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE3); } +void pblendvb(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66|T_0F38, 0x10, isXMM_XMMorMEM, NONE); } +void pblendw(const Xmm& xmm, const Operand& op, int imm) { opSSE(xmm, op, T_66 | T_0F3A, 0x0E, isXMM_XMMorMEM, static_cast(imm)); } +void pclmulhqhqdq(const Xmm& xmm, const Operand& op) { pclmulqdq(xmm, op, 0x11); } +void pclmulhqlqdq(const Xmm& xmm, const Operand& op) { pclmulqdq(xmm, op, 0x01); } +void pclmullqhqdq(const Xmm& xmm, const Operand& op) { pclmulqdq(xmm, op, 0x10); } +void pclmullqlqdq(const Xmm& xmm, const Operand& op) { pclmulqdq(xmm, op, 0x00); } +void pclmulqdq(const Xmm& xmm, const Operand& op, int imm) { opSSE(xmm, op, T_66 | T_0F3A, 0x44, isXMM_XMMorMEM, static_cast(imm)); } +void pcmpeqb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x74); } +void pcmpeqd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x76); } +void pcmpeqq(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66 | T_0F38, 0x29, isXMM_XMMorMEM); } +void pcmpeqw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x75); } +void pcmpestri(const Xmm& xmm, const Operand& op, uint8_t imm) { opSSE(xmm, op, T_66|T_0F3A, 0x61, isXMM_XMMorMEM, imm); } +void pcmpestrm(const Xmm& xmm, const Operand& op, uint8_t imm) { opSSE(xmm, op, T_66|T_0F3A, 0x60, isXMM_XMMorMEM, imm); } +void pcmpgtb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x64); } +void pcmpgtd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x66); } +void pcmpgtq(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66 | T_0F38, 0x37, isXMM_XMMorMEM); } +void pcmpgtw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x65); } +void pcmpistri(const Xmm& xmm, const Operand& op, uint8_t imm) { opSSE(xmm, op, T_66|T_0F3A, 0x63, isXMM_XMMorMEM, imm); } +void pcmpistrm(const Xmm& xmm, const Operand& op, uint8_t imm) { opSSE(xmm, op, T_66|T_0F3A, 0x62, isXMM_XMMorMEM, imm); } +void pdep(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opRRO(r1, r2, op, T_APX|T_F2|T_0F38, 0xf5); } +void pext(const Reg32e& r1, const Reg32e& r2, const Operand& op) { opRRO(r1, r2, op, T_APX|T_F3|T_0F38, 0xf5); } +void pextrb(const Operand& op, const Xmm& xmm, uint8_t imm) { opExt(op, xmm, 0x14, imm); } +void pextrd(const Operand& op, const Xmm& xmm, uint8_t imm) { opExt(op, xmm, 0x16, imm); } +void pextrw(const Operand& op, const Mmx& xmm, uint8_t imm) { opExt(op, xmm, 0x15, imm, true); } +void phaddd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x02, T_0F38, T_66); } +void phaddsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x03, T_0F38, T_66); } +void phaddw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x01, T_0F38, T_66); } +void phminposuw(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66|T_0F38, 0x41, isXMM_XMMorMEM, NONE); } +void phsubd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x06, T_0F38, T_66); } +void phsubsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x07, T_0F38, T_66); } +void phsubw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x05, T_0F38, T_66); } +void pinsrb(const Xmm& xmm, const Operand& op, uint8_t imm) { opSSE(xmm, op, T_66 | T_0F3A, 0x20, isXMM_REG32orMEM, imm); } +void pinsrd(const Xmm& xmm, const Operand& op, uint8_t imm) { opSSE(xmm, op, T_66 | T_0F3A, 0x22, isXMM_REG32orMEM, imm); } +void pinsrw(const Mmx& mmx, const Operand& op, int imm) { if (!op.isREG(32) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opSSE(mmx, op, T_0F | (mmx.isXMM() ? T_66 : T_NONE), 0xC4, 0, imm); } +void pmaddubsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x04, T_0F38, T_66); } +void pmaddwd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF5); } +void pmaxsb(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66 | T_0F38, 0x3C, isXMM_XMMorMEM); } +void pmaxsd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66 | T_0F38, 0x3D, isXMM_XMMorMEM); } +void pmaxsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xEE); } +void pmaxub(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDE); } +void pmaxud(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66 | T_0F38, 0x3F, isXMM_XMMorMEM); } +void pmaxuw(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66 | T_0F38, 0x3E, isXMM_XMMorMEM); } +void pminsb(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66 | T_0F38, 0x38, isXMM_XMMorMEM); } +void pminsd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66 | T_0F38, 0x39, isXMM_XMMorMEM); } +void pminsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xEA); } +void pminub(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xDA); } +void pminud(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66 | T_0F38, 0x3B, isXMM_XMMorMEM); } +void pminuw(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66 | T_0F38, 0x3A, isXMM_XMMorMEM); } +void pmovmskb(const Reg32e& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opSSE(reg, mmx, T_0F, 0xD7); } +void pmovsxbd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_N4|T_N_VL|T_66|T_0F38|T_YMM|T_EVEX, 0x21, isXMM_XMMorMEM, NONE); } +void pmovsxbq(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_N2|T_N_VL|T_66|T_0F38|T_YMM|T_EVEX, 0x22, isXMM_XMMorMEM, NONE); } +void pmovsxbw(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_N8|T_N_VL|T_66|T_0F38|T_YMM|T_EVEX, 0x20, isXMM_XMMorMEM, NONE); } +void pmovsxdq(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_N8|T_N_VL|T_66|T_0F38|T_W0|T_YMM|T_EVEX, 0x25, isXMM_XMMorMEM, NONE); } +void pmovsxwd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_N8|T_N_VL|T_66|T_0F38|T_YMM|T_EVEX, 0x23, isXMM_XMMorMEM, NONE); } +void pmovsxwq(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_N4|T_N_VL|T_66|T_0F38|T_YMM|T_EVEX, 0x24, isXMM_XMMorMEM, NONE); } +void pmovzxbd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_N4|T_N_VL|T_66|T_0F38|T_YMM|T_EVEX, 0x31, isXMM_XMMorMEM, NONE); } +void pmovzxbq(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_N2|T_N_VL|T_66|T_0F38|T_YMM|T_EVEX, 0x32, isXMM_XMMorMEM, NONE); } +void pmovzxbw(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_N8|T_N_VL|T_66|T_0F38|T_YMM|T_EVEX, 0x30, isXMM_XMMorMEM, NONE); } +void pmovzxdq(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_N8|T_N_VL|T_66|T_0F38|T_W0|T_YMM|T_EVEX, 0x35, isXMM_XMMorMEM, NONE); } +void pmovzxwd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_N8|T_N_VL|T_66|T_0F38|T_YMM|T_EVEX, 0x33, isXMM_XMMorMEM, NONE); } +void pmovzxwq(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_N4|T_N_VL|T_66|T_0F38|T_YMM|T_EVEX, 0x34, isXMM_XMMorMEM, NONE); } +void pmuldq(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66 | T_0F38, 0x28, isXMM_XMMorMEM); } +void pmulhrsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x0B, T_0F38, T_66); } +void pmulhuw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE4); } +void pmulhw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE5); } +void pmulld(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66 | T_0F38, 0x40, isXMM_XMMorMEM); } +void pmullw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xD5); } +void pmuludq(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF4); } +void popcnt(const Reg®, const Operand& op) { opCnt(reg, op, 0xB8); } +void popf() { db(0x9D); } +void por(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xEB); } +void prefetchit0(const Address& addr) { opMR(addr, Reg32(7), T_0F, 0x18); } +void prefetchit1(const Address& addr) { opMR(addr, Reg32(6), T_0F, 0x18); } +void prefetchnta(const Address& addr) { opMR(addr, Reg32(0), T_0F, 0x18); } +void prefetcht0(const Address& addr) { opMR(addr, Reg32(1), T_0F, 0x18); } +void prefetcht1(const Address& addr) { opMR(addr, Reg32(2), T_0F, 0x18); } +void prefetcht2(const Address& addr) { opMR(addr, Reg32(3), T_0F, 0x18); } +void prefetchw(const Address& addr) { opMR(addr, Reg32(1), T_0F, 0x0D); } +void prefetchwt1(const Address& addr) { opMR(addr, Reg32(2), T_0F, 0x0D); } +void psadbw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF6); } +void pshufb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x00, T_0F38, T_66); } +void pshufd(const Mmx& mmx, const Operand& op, uint8_t imm8) { opMMX(mmx, op, 0x70, T_0F, T_66, imm8); } +void pshufhw(const Mmx& mmx, const Operand& op, uint8_t imm8) { opMMX(mmx, op, 0x70, T_0F, T_F3, imm8); } +void pshuflw(const Mmx& mmx, const Operand& op, uint8_t imm8) { opMMX(mmx, op, 0x70, T_0F, T_F2, imm8); } +void pshufw(const Mmx& mmx, const Operand& op, uint8_t imm8) { opMMX(mmx, op, 0x70, T_0F, T_NONE, imm8); } +void psignb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x08, T_0F38, T_66); } +void psignd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x0A, T_0F38, T_66); } +void psignw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x09, T_0F38, T_66); } +void pslld(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF2); } +void pslld(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x72, 6); } +void pslldq(const Xmm& xmm, int imm8) { opMMX_IMM(xmm, imm8, 0x73, 7); } +void psllq(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF3); } +void psllq(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x73, 6); } +void psllw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF1); } +void psllw(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x71, 6); } +void psrad(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE2); } +void psrad(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x72, 4); } +void psraw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE1); } +void psraw(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x71, 4); } +void psrld(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xD2); } +void psrld(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x72, 2); } +void psrldq(const Xmm& xmm, int imm8) { opMMX_IMM(xmm, imm8, 0x73, 3); } +void psrlq(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xD3); } +void psrlq(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x73, 2); } +void psrlw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xD1); } +void psrlw(const Mmx& mmx, int imm8) { opMMX_IMM(mmx, imm8, 0x71, 2); } +void psubb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF8); } +void psubd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xFA); } +void psubq(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xFB); } +void psubsb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE8); } +void psubsw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xE9); } +void psubusb(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xD8); } +void psubusw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xD9); } +void psubw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xF9); } +void ptest(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66|T_0F38|T_YMM, 0x17, isXMM_XMMorMEM, NONE); } +void punpckhbw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x68); } +void punpckhdq(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x6A); } +void punpckhqdq(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66|T_0F, 0x6D, isXMM_XMMorMEM); } +void punpckhwd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x69); } +void punpcklbw(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x60); } +void punpckldq(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x62); } +void punpcklqdq(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66|T_0F, 0x6C, isXMM_XMMorMEM); } +void punpcklwd(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0x61); } +void pushf() { db(0x9C); } +void pxor(const Mmx& mmx, const Operand& op) { opMMX(mmx, op, 0xEF); } +void rcl(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 2); } +void rcl(const Operand& op, int imm) { opShift(op, imm, 2); } +void rcl(const Reg& d, const Operand& op, const Reg8& _cl) { opShift(op, _cl, 2, &d); } +void rcl(const Reg& d, const Operand& op, int imm) { opShift(op, imm, 2, &d); } +void rcpps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F, 0x53, isXMM_XMMorMEM); } +void rcpss(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_F3, 0x53, isXMM_XMMorMEM); } +void rcr(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 3); } +void rcr(const Operand& op, int imm) { opShift(op, imm, 3); } +void rcr(const Reg& d, const Operand& op, const Reg8& _cl) { opShift(op, _cl, 3, &d); } +void rcr(const Reg& d, const Operand& op, int imm) { opShift(op, imm, 3, &d); } +void rdmsr() { db(0x0F); db(0x32); } +void rdpmc() { db(0x0F); db(0x33); } +void rdrand(const Reg& r) { if (r.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opRR(Reg(6, Operand::REG, r.getBit()), r, T_0F, 0xC7); } +void rdseed(const Reg& r) { if (r.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opRR(Reg(7, Operand::REG, r.getBit()), r, T_0F, 0xC7); } +void rdtsc() { db(0x0F); db(0x31); } +void rdtscp() { db(0x0F); db(0x01); db(0xF9); } +void rep() { db(0xF3); } +void repe() { db(0xF3); } +void repne() { db(0xF2); } +void repnz() { db(0xF2); } +void repz() { db(0xF3); } +void ret(int imm = 0) { if (imm) { db(0xC2); dw(imm); } else { db(0xC3); } } +void retf(int imm = 0) { if (imm) { db(0xCA); dw(imm); } else { db(0xCB); } } +void rol(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 8); } +void rol(const Operand& op, int imm) { opShift(op, imm, 8); } +void rol(const Reg& d, const Operand& op, const Reg8& _cl) { opShift(op, _cl, 8, &d); } +void rol(const Reg& d, const Operand& op, int imm) { opShift(op, imm, 8, &d); } +void ror(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 9); } +void ror(const Operand& op, int imm) { opShift(op, imm, 9); } +void ror(const Reg& d, const Operand& op, const Reg8& _cl) { opShift(op, _cl, 9, &d); } +void ror(const Reg& d, const Operand& op, int imm) { opShift(op, imm, 9, &d); } +void rorx(const Reg32e& r, const Operand& op, uint8_t imm) { opRRO(r, Reg32e(0, r.getBit()), op, T_0F3A|T_F2|T_APX, 0xF0, imm); } +void roundpd(const Xmm& xmm, const Operand& op, uint8_t imm) { opSSE(xmm, op, T_66|T_0F3A|T_YMM, 0x09, isXMM_XMMorMEM, imm); } +void roundps(const Xmm& xmm, const Operand& op, uint8_t imm) { opSSE(xmm, op, T_66|T_0F3A|T_YMM, 0x08, isXMM_XMMorMEM, imm); } +void roundsd(const Xmm& xmm, const Operand& op, int imm) { opSSE(xmm, op, T_66 | T_0F3A, 0x0B, isXMM_XMMorMEM, static_cast(imm)); } +void roundss(const Xmm& xmm, const Operand& op, int imm) { opSSE(xmm, op, T_66 | T_0F3A, 0x0A, isXMM_XMMorMEM, static_cast(imm)); } +void rsqrtps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F, 0x52, isXMM_XMMorMEM); } +void rsqrtss(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_F3, 0x52, isXMM_XMMorMEM); } +void sahf() { db(0x9E); } +void sal(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 12); } +void sal(const Operand& op, int imm) { opShift(op, imm, 12); } +void sal(const Reg& d, const Operand& op, const Reg8& _cl) { opShift(op, _cl, 12, &d); } +void sal(const Reg& d, const Operand& op, int imm) { opShift(op, imm, 12, &d); } +void sar(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 15); } +void sar(const Operand& op, int imm) { opShift(op, imm, 15); } +void sar(const Reg& d, const Operand& op, const Reg8& _cl) { opShift(op, _cl, 15, &d); } +void sar(const Reg& d, const Operand& op, int imm) { opShift(op, imm, 15, &d); } +void sarx(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opRRO(r1, r2, op, T_APX|T_F3|T_0F38, 0xf7); } +void sbb(const Operand& op, uint32_t imm) { opOI(op, imm, 0x18, 3); } +void sbb(const Operand& op1, const Operand& op2) { opRO_MR(op1, op2, 0x18); } +void sbb(const Reg& d, const Operand& op, uint32_t imm) { opROI(d, op, imm, T_NONE, 3); } +void sbb(const Reg& d, const Operand& op1, const Operand& op2) { opROO(d, op1, op2, T_NONE, 0x18); } +void scasb() { db(0xAE); } +void scasd() { db(0xAF); } +void scasw() { db(0x66); db(0xAF); } +void serialize() { db(0x0F); db(0x01); db(0xE8); } +void seta(const Operand& op) { opSetCC(op, 7); }//-V524 +void setae(const Operand& op) { opSetCC(op, 3); }//-V524 +void setb(const Operand& op) { opSetCC(op, 2); }//-V524 +void setbe(const Operand& op) { opSetCC(op, 6); }//-V524 +void setc(const Operand& op) { opSetCC(op, 2); }//-V524 +void sete(const Operand& op) { opSetCC(op, 4); }//-V524 +void setg(const Operand& op) { opSetCC(op, 15); }//-V524 +void setge(const Operand& op) { opSetCC(op, 13); }//-V524 +void setl(const Operand& op) { opSetCC(op, 12); }//-V524 +void setle(const Operand& op) { opSetCC(op, 14); }//-V524 +void setna(const Operand& op) { opSetCC(op, 6); }//-V524 +void setnae(const Operand& op) { opSetCC(op, 2); }//-V524 +void setnb(const Operand& op) { opSetCC(op, 3); }//-V524 +void setnbe(const Operand& op) { opSetCC(op, 7); }//-V524 +void setnc(const Operand& op) { opSetCC(op, 3); }//-V524 +void setne(const Operand& op) { opSetCC(op, 5); }//-V524 +void setng(const Operand& op) { opSetCC(op, 14); }//-V524 +void setnge(const Operand& op) { opSetCC(op, 12); }//-V524 +void setnl(const Operand& op) { opSetCC(op, 13); }//-V524 +void setnle(const Operand& op) { opSetCC(op, 15); }//-V524 +void setno(const Operand& op) { opSetCC(op, 1); }//-V524 +void setnp(const Operand& op) { opSetCC(op, 11); }//-V524 +void setns(const Operand& op) { opSetCC(op, 9); }//-V524 +void setnz(const Operand& op) { opSetCC(op, 5); }//-V524 +void seto(const Operand& op) { opSetCC(op, 0); }//-V524 +void setp(const Operand& op) { opSetCC(op, 10); }//-V524 +void setpe(const Operand& op) { opSetCC(op, 10); }//-V524 +void setpo(const Operand& op) { opSetCC(op, 11); }//-V524 +void sets(const Operand& op) { opSetCC(op, 8); }//-V524 +void setz(const Operand& op) { opSetCC(op, 4); }//-V524 +void sfence() { db(0x0F); db(0xAE); db(0xF8); } +void sha1msg1(const Xmm& x, const Operand& op) { opSSE_APX(x, op, T_0F38, 0xC9, T_MUST_EVEX, 0xD9); } +void sha1msg2(const Xmm& x, const Operand& op) { opSSE_APX(x, op, T_0F38, 0xCA, T_MUST_EVEX, 0xDA); } +void sha1nexte(const Xmm& x, const Operand& op) { opSSE_APX(x, op, T_0F38, 0xC8, T_MUST_EVEX, 0xD8); } +void sha1rnds4(const Xmm& x, const Operand& op, uint8_t imm) { opSSE_APX(x, op, T_0F3A, 0xCC, T_MUST_EVEX, 0xD4, imm); } +void sha256msg1(const Xmm& x, const Operand& op) { opSSE_APX(x, op, T_0F38, 0xCC, T_MUST_EVEX, 0xDC); } +void sha256msg2(const Xmm& x, const Operand& op) { opSSE_APX(x, op, T_0F38, 0xCD, T_MUST_EVEX, 0xDD); } +void sha256rnds2(const Xmm& x, const Operand& op) { opSSE_APX(x, op, T_0F38, 0xCB, T_MUST_EVEX, 0xDB); } +void shl(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 12); } +void shl(const Operand& op, int imm) { opShift(op, imm, 12); } +void shl(const Reg& d, const Operand& op, const Reg8& _cl) { opShift(op, _cl, 12, &d); } +void shl(const Reg& d, const Operand& op, int imm) { opShift(op, imm, 12, &d); } +void shld(const Operand& op, const Reg& reg, const Reg8& _cl) { opShxd(Reg(), op, reg, 0, 0xA4, 0x24, &_cl); } +void shld(const Operand& op, const Reg& reg, uint8_t imm) { opShxd(Reg(), op, reg, imm, 0xA4, 0x24); } +void shld(const Reg& d, const Operand& op, const Reg& reg, const Reg8& _cl) { opShxd(d, op, reg, 0, 0xA4, 0x24, &_cl); } +void shld(const Reg& d, const Operand& op, const Reg& reg, uint8_t imm) { opShxd(d, op, reg, imm, 0xA4, 0x24); } +void shlx(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opRRO(r1, r2, op, T_APX|T_66|T_0F38, 0xf7); } +void shr(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 13); } +void shr(const Operand& op, int imm) { opShift(op, imm, 13); } +void shr(const Reg& d, const Operand& op, const Reg8& _cl) { opShift(op, _cl, 13, &d); } +void shr(const Reg& d, const Operand& op, int imm) { opShift(op, imm, 13, &d); } +void shrd(const Operand& op, const Reg& reg, const Reg8& _cl) { opShxd(Reg(), op, reg, 0, 0xAC, 0x2C, &_cl); } +void shrd(const Operand& op, const Reg& reg, uint8_t imm) { opShxd(Reg(), op, reg, imm, 0xAC, 0x2C); } +void shrd(const Reg& d, const Operand& op, const Reg& reg, const Reg8& _cl) { opShxd(d, op, reg, 0, 0xAC, 0x2C, &_cl); } +void shrd(const Reg& d, const Operand& op, const Reg& reg, uint8_t imm) { opShxd(d, op, reg, imm, 0xAC, 0x2C); } +void shrx(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opRRO(r1, r2, op, T_APX|T_F2|T_0F38, 0xf7); } +void shufpd(const Xmm& xmm, const Operand& op, uint8_t imm8) { opSSE(xmm, op, T_0F | T_66, 0xC6, isXMM_XMMorMEM, imm8); } +void shufps(const Xmm& xmm, const Operand& op, uint8_t imm8) { opSSE(xmm, op, T_0F, 0xC6, isXMM_XMMorMEM, imm8); } +void sqrtpd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_66, 0x51, isXMM_XMMorMEM); } +void sqrtps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F, 0x51, isXMM_XMMorMEM); } +void sqrtsd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_F2, 0x51, isXMM_XMMorMEM); } +void sqrtss(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_F3, 0x51, isXMM_XMMorMEM); } +void stac() { db(0x0F); db(0x01); db(0xCB); } +void stc() { db(0xF9); } +void std() { db(0xFD); } +void sti() { db(0xFB); } +void stmxcsr(const Address& addr) { opMR(addr, Reg32(3), T_0F, 0xAE); } +void stosb() { db(0xAA); } +void stosd() { db(0xAB); } +void stosw() { db(0x66); db(0xAB); } +void sub(const Operand& op, uint32_t imm) { opOI(op, imm, 0x28, 5); } +void sub(const Operand& op1, const Operand& op2) { opRO_MR(op1, op2, 0x28); } +void sub(const Reg& d, const Operand& op, uint32_t imm) { opROI(d, op, imm, T_NF|T_CODE1_IF1, 5); } +void sub(const Reg& d, const Operand& op1, const Operand& op2) { opROO(d, op1, op2, T_NF|T_CODE1_IF1, 0x28); } +void subpd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_66, 0x5C, isXMM_XMMorMEM); } +void subps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F, 0x5C, isXMM_XMMorMEM); } +void subsd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_F2, 0x5C, isXMM_XMMorMEM); } +void subss(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_F3, 0x5C, isXMM_XMMorMEM); } +void sysenter() { db(0x0F); db(0x34); } +void sysexit() { db(0x0F); db(0x35); } +void tpause(const Reg32& r) { int idx = r.getIdx(); if (idx > 7) XBYAK_THROW(ERR_BAD_PARAMETER) db(0x66); db(0x0F); db(0xAE); setModRM(3, 6, idx); } +void tzcnt(const Reg®, const Operand& op) { if (opROO(Reg(), op, reg, T_APX|T_NF, 0xF4)) return; opCnt(reg, op, 0xBC); } +void ucomisd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_66|T_0F, 0x2E, isXMM_XMMorMEM); } +void ucomiss(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F, 0x2E, isXMM_XMMorMEM); } +void ud2() { db(0x0F); db(0x0B); } +void umonitor(const Reg& r) { int idx = r.getIdx(); if (idx > 7) XBYAK_THROW(ERR_BAD_PARAMETER) int bit = r.getBit(); if (BIT != bit) { if ((BIT == 32 && bit == 16) || (BIT == 64 && bit == 32)) { db(0x67); } else { XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) } } db(0xF3); db(0x0F); db(0xAE); setModRM(3, 6, idx); } +void umwait(const Reg32& r) { int idx = r.getIdx(); if (idx > 7) XBYAK_THROW(ERR_BAD_PARAMETER) db(0xF2); db(0x0F); db(0xAE); setModRM(3, 6, idx); } +void unpckhpd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_66, 0x15, isXMM_XMMorMEM); } +void unpckhps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F, 0x15, isXMM_XMMorMEM); } +void unpcklpd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_66, 0x14, isXMM_XMMorMEM); } +void unpcklps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F, 0x14, isXMM_XMMorMEM); } +void vaddpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x58); } +void vaddps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_W0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x58); } +void vaddsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_X | T_N8, 0x58); } +void vaddss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_W0 | T_EVEX | T_ER_X | T_N4, 0x58); } +void vaddsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66|T_0F|T_YMM, 0xD0); } +void vaddsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_F2|T_0F|T_YMM, 0xD0); } +void vaesdec(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66|T_0F38|T_YMM|T_EVEX, 0xDE); } +void vaesdeclast(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66|T_0F38|T_YMM|T_EVEX, 0xDF); } +void vaesenc(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66|T_0F38|T_YMM|T_EVEX, 0xDC); } +void vaesenclast(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66|T_0F38|T_YMM|T_EVEX, 0xDD); } +void vaesimc(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66|T_0F38|T_W0, 0xDB); } +void vaeskeygenassist(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66|T_0F3A, 0xDF, imm); } +void vandnpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x55); } +void vandnps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_W0 | T_YMM | T_EVEX | T_B32, 0x55); } +void vandpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x54); } +void vandps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_W0 | T_YMM | T_EVEX | T_B32, 0x54); } +void vbcstnebf162ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_F3|T_0F38|T_W0|T_YMM|T_B16, 0xB1); } +void vbcstnesh2ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_66|T_0F38|T_W0|T_YMM|T_B16, 0xB1); } +void vblendpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_W0|T_YMM, 0x0D, imm); } +void vblendps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_W0|T_YMM, 0x0C, imm); } +void vblendvpd(const Xmm& x1, const Xmm& x2, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x2, op, T_0F3A | T_66 | T_YMM, 0x4B, x4.getIdx() << 4); } +void vblendvps(const Xmm& x1, const Xmm& x2, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x2, op, T_0F3A | T_66 | T_YMM, 0x4A, x4.getIdx() << 4); } +void vbroadcastf128(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x1A); } +void vbroadcasti128(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x5A); } +void vbroadcastsd(const Ymm& y, const Operand& op) { if (!op.isMEM() && !(y.isYMM() && op.isXMM()) && !(y.isZMM() && op.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(y, op, T_0F38 | T_66 | T_W0 | T_YMM | T_EVEX | T_EW1 | T_N8, 0x19); } +void vbroadcastss(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(x, op, T_N4|T_66|T_0F38|T_W0|T_YMM|T_EVEX, 0x18); } +void vcmpeq_ospd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 16); } +void vcmpeq_osps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 16); } +void vcmpeq_ossd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 16); } +void vcmpeq_osss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 16); } +void vcmpeq_uqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 8); } +void vcmpeq_uqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 8); } +void vcmpeq_uqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 8); } +void vcmpeq_uqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 8); } +void vcmpeq_uspd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 24); } +void vcmpeq_usps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 24); } +void vcmpeq_ussd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 24); } +void vcmpeq_usss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 24); } +void vcmpeqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 0); } +void vcmpeqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 0); } +void vcmpeqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 0); } +void vcmpeqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 0); } +void vcmpfalse_ospd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 27); } +void vcmpfalse_osps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 27); } +void vcmpfalse_ossd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 27); } +void vcmpfalse_osss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 27); } +void vcmpfalsepd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 11); } +void vcmpfalseps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 11); } +void vcmpfalsesd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 11); } +void vcmpfalsess(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 11); } +void vcmpge_oqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 29); } +void vcmpge_oqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 29); } +void vcmpge_oqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 29); } +void vcmpge_oqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 29); } +void vcmpgepd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 13); } +void vcmpgeps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 13); } +void vcmpgesd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 13); } +void vcmpgess(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 13); } +void vcmpgt_oqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 30); } +void vcmpgt_oqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 30); } +void vcmpgt_oqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 30); } +void vcmpgt_oqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 30); } +void vcmpgtpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 14); } +void vcmpgtps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 14); } +void vcmpgtsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 14); } +void vcmpgtss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 14); } +void vcmple_oqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 18); } +void vcmple_oqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 18); } +void vcmple_oqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 18); } +void vcmple_oqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 18); } +void vcmplepd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 2); } +void vcmpleps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 2); } +void vcmplesd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 2); } +void vcmpless(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 2); } +void vcmplt_oqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 17); } +void vcmplt_oqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 17); } +void vcmplt_oqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 17); } +void vcmplt_oqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 17); } +void vcmpltpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 1); } +void vcmpltps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 1); } +void vcmpltsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 1); } +void vcmpltss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 1); } +void vcmpneq_oqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 12); } +void vcmpneq_oqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 12); } +void vcmpneq_oqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 12); } +void vcmpneq_oqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 12); } +void vcmpneq_ospd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 28); } +void vcmpneq_osps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 28); } +void vcmpneq_ossd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 28); } +void vcmpneq_osss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 28); } +void vcmpneq_uspd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 20); } +void vcmpneq_usps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 20); } +void vcmpneq_ussd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 20); } +void vcmpneq_usss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 20); } +void vcmpneqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 4); } +void vcmpneqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 4); } +void vcmpneqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 4); } +void vcmpneqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 4); } +void vcmpnge_uqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 25); } +void vcmpnge_uqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 25); } +void vcmpnge_uqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 25); } +void vcmpnge_uqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 25); } +void vcmpngepd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 9); } +void vcmpngeps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 9); } +void vcmpngesd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 9); } +void vcmpngess(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 9); } +void vcmpngt_uqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 26); } +void vcmpngt_uqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 26); } +void vcmpngt_uqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 26); } +void vcmpngt_uqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 26); } +void vcmpngtpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 10); } +void vcmpngtps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 10); } +void vcmpngtsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 10); } +void vcmpngtss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 10); } +void vcmpnle_uqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 22); } +void vcmpnle_uqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 22); } +void vcmpnle_uqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 22); } +void vcmpnle_uqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 22); } +void vcmpnlepd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 6); } +void vcmpnleps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 6); } +void vcmpnlesd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 6); } +void vcmpnless(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 6); } +void vcmpnlt_uqpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 21); } +void vcmpnlt_uqps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 21); } +void vcmpnlt_uqsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 21); } +void vcmpnlt_uqss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 21); } +void vcmpnltpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 5); } +void vcmpnltps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 5); } +void vcmpnltsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 5); } +void vcmpnltss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 5); } +void vcmpord_spd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 23); } +void vcmpord_sps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 23); } +void vcmpord_ssd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 23); } +void vcmpord_sss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 23); } +void vcmpordpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 7); } +void vcmpordps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 7); } +void vcmpordsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 7); } +void vcmpordss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 7); } +void vcmppd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM, 0xC2, imm); } +void vcmpps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_0F|T_YMM, 0xC2, imm); } +void vcmpsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_F2|T_0F, 0xC2, imm); } +void vcmpss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_F3|T_0F, 0xC2, imm); } +void vcmptrue_uspd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 31); } +void vcmptrue_usps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 31); } +void vcmptrue_ussd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 31); } +void vcmptrue_usss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 31); } +void vcmptruepd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 15); } +void vcmptrueps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 15); } +void vcmptruesd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 15); } +void vcmptruess(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 15); } +void vcmpunord_spd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 19); } +void vcmpunord_sps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 19); } +void vcmpunord_ssd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 19); } +void vcmpunord_sss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 19); } +void vcmpunordpd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmppd(x1, x2, op, 3); } +void vcmpunordps(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpps(x1, x2, op, 3); } +void vcmpunordsd(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpsd(x1, x2, op, 3); } +void vcmpunordss(const Xmm& x1, const Xmm& x2, const Operand& op) { vcmpss(x1, x2, op, 3); } +void vcomisd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N8|T_66|T_0F|T_EW1|T_EVEX|T_SAE_X, 0x2F); } +void vcomiss(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N4|T_0F|T_W0|T_EVEX|T_SAE_X, 0x2F); } +void vcvtdq2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F | T_F3 | T_YMM | T_EVEX | T_W0 | T_B32 | T_N8 | T_N_VL, 0xE6); } +void vcvtdq2ps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F|T_W0|T_YMM|T_EVEX|T_ER_Z|T_B32, 0x5B); } +void vcvtneebf162ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_F3|T_0F38|T_W0|T_YMM, 0xB0); } +void vcvtneeph2ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_66|T_0F38|T_W0|T_YMM, 0xB0); } +void vcvtneobf162ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_F2|T_0F38|T_W0|T_YMM, 0xB0); } +void vcvtneoph2ps(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_0F38|T_W0|T_YMM, 0xB0); } +void vcvtneps2bf16(const Xmm& x, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opCvt2(x, op, T_F3|T_0F38|T_W0|T_YMM|T_SAE_Z|T_B32|orEvexIf(encoding, 0, T_MUST_EVEX, 0), 0x72); } +void vcvtpd2dq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_F2 | T_YMM | T_EVEX | T_EW1 | T_B64 | T_ER_Z, 0xE6); } +void vcvtpd2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_66 | T_YMM | T_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x5A); } +void vcvtph2ps(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F38 | T_66 | T_W0 | T_EVEX | T_N8 | T_N_VL | T_SAE_Y, 0x13); } +void vcvtps2dq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66|T_0F|T_W0|T_YMM|T_EVEX|T_ER_Z|T_B32, 0x5B); } +void vcvtps2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F | T_YMM | T_EVEX | T_W0 | T_B32 | T_N8 | T_N_VL | T_SAE_Y, 0x5A); } +void vcvtps2ph(const Operand& op, const Xmm& x, uint8_t imm) { checkCvt1(x, op); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_N8 | T_N_VL | T_SAE_Y | T_M_K, 0x1D, imm); } +void vcvtsd2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W0 | T_EVEX | T_N4 | T_ER_X, 0x2D); } +void vcvtsd2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_F2|T_0F|T_EW1|T_EVEX|T_ER_X, 0x5A); } +void vcvtsi2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_0F | T_F2 | T_EVEX, T_W1 | T_EW1 | T_ER_X | T_N8, T_W0 | T_N4, 0x2A); } +void vcvtsi2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_0F | T_F3 | T_EVEX | T_ER_X, T_W1 | T_EW1 | T_N8, T_W0 | T_N4, 0x2A); } +void vcvtss2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_F3|T_0F|T_W0|T_EVEX|T_SAE_X, 0x5A); } +void vcvtss2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W0 | T_EVEX | T_ER_X | T_N8, 0x2D); } +void vcvttpd2dq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_66 | T_0F | T_YMM | T_EVEX |T_EW1 | T_B64 | T_SAE_Z, 0xE6); } +void vcvttps2dq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_F3|T_0F|T_W0|T_YMM|T_EVEX|T_SAE_Z|T_B32, 0x5B); } +void vcvttsd2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W0 | T_EVEX | T_N4 | T_SAE_X, 0x2C); } +void vcvttss2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W0 | T_EVEX | T_SAE_X | T_N8, 0x2C); } +void vdivpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x5E); } +void vdivps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_W0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5E); } +void vdivsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_X | T_N8, 0x5E); } +void vdivss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_W0 | T_EVEX | T_ER_X | T_N4, 0x5E); } +void vdppd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_W0, 0x41, imm); } +void vdpps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_W0|T_YMM, 0x40, imm); } +void vextractf128(const Operand& op, const Ymm& y, uint8_t imm) { if (!(op.isXMEM() && y.isYMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y, 0, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x19, imm); } +void vextracti128(const Operand& op, const Ymm& y, uint8_t imm) { if (!(op.isXMEM() && y.isYMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y, 0, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x39, imm); } +void vextractps(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(32) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_N4, 0x17, imm); } +void vfmadd132pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_ER_Z|T_B64, 0x98); } +void vfmadd132ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM|T_EVEX|T_ER_Z|T_B32, 0x98); } +void vfmadd132sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_W1|T_EW1|T_EVEX|T_ER_X, 0x99); } +void vfmadd132ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_W0|T_EVEX|T_ER_X, 0x99); } +void vfmadd213pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_ER_Z|T_B64, 0xA8); } +void vfmadd213ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM|T_EVEX|T_ER_Z|T_B32, 0xA8); } +void vfmadd213sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_W1|T_EW1|T_EVEX|T_ER_X, 0xA9); } +void vfmadd213ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_W0|T_EVEX|T_ER_X, 0xA9); } +void vfmadd231pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_ER_Z|T_B64, 0xB8); } +void vfmadd231ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM|T_EVEX|T_ER_Z|T_B32, 0xB8); } +void vfmadd231sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_W1|T_EW1|T_EVEX|T_ER_X, 0xB9); } +void vfmadd231ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_W0|T_EVEX|T_ER_X, 0xB9); } +void vfmaddsub132pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_ER_Z|T_B64, 0x96); } +void vfmaddsub132ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM|T_EVEX|T_ER_Z|T_B32, 0x96); } +void vfmaddsub213pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_ER_Z|T_B64, 0xA6); } +void vfmaddsub213ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM|T_EVEX|T_ER_Z|T_B32, 0xA6); } +void vfmaddsub231pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_ER_Z|T_B64, 0xB6); } +void vfmaddsub231ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM|T_EVEX|T_ER_Z|T_B32, 0xB6); } +void vfmsub132pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_ER_Z|T_B64, 0x9A); } +void vfmsub132ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM|T_EVEX|T_ER_Z|T_B32, 0x9A); } +void vfmsub132sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_W1|T_EW1|T_EVEX|T_ER_X, 0x9B); } +void vfmsub132ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_W0|T_EVEX|T_ER_X, 0x9B); } +void vfmsub213pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_ER_Z|T_B64, 0xAA); } +void vfmsub213ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM|T_EVEX|T_ER_Z|T_B32, 0xAA); } +void vfmsub213sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_W1|T_EW1|T_EVEX|T_ER_X, 0xAB); } +void vfmsub213ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_W0|T_EVEX|T_ER_X, 0xAB); } +void vfmsub231pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_ER_Z|T_B64, 0xBA); } +void vfmsub231ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM|T_EVEX|T_ER_Z|T_B32, 0xBA); } +void vfmsub231sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_W1|T_EW1|T_EVEX|T_ER_X, 0xBB); } +void vfmsub231ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_W0|T_EVEX|T_ER_X, 0xBB); } +void vfmsubadd132pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_ER_Z|T_B64, 0x97); } +void vfmsubadd132ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM|T_EVEX|T_ER_Z|T_B32, 0x97); } +void vfmsubadd213pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_ER_Z|T_B64, 0xA7); } +void vfmsubadd213ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM|T_EVEX|T_ER_Z|T_B32, 0xA7); } +void vfmsubadd231pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_ER_Z|T_B64, 0xB7); } +void vfmsubadd231ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM|T_EVEX|T_ER_Z|T_B32, 0xB7); } +void vfnmadd132pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_ER_Z|T_B64, 0x9C); } +void vfnmadd132ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM|T_EVEX|T_ER_Z|T_B32, 0x9C); } +void vfnmadd132sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_W1|T_EW1|T_EVEX|T_ER_X, 0x9D); } +void vfnmadd132ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_W0|T_EVEX|T_ER_X, 0x9D); } +void vfnmadd213pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_ER_Z|T_B64, 0xAC); } +void vfnmadd213ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM|T_EVEX|T_ER_Z|T_B32, 0xAC); } +void vfnmadd213sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_W1|T_EW1|T_EVEX|T_ER_X, 0xAD); } +void vfnmadd213ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_W0|T_EVEX|T_ER_X, 0xAD); } +void vfnmadd231pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_ER_Z|T_B64, 0xBC); } +void vfnmadd231ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM|T_EVEX|T_ER_Z|T_B32, 0xBC); } +void vfnmadd231sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_W1|T_EW1|T_EVEX|T_ER_X, 0xBD); } +void vfnmadd231ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_W0|T_EVEX|T_ER_X, 0xBD); } +void vfnmsub132pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_ER_Z|T_B64, 0x9E); } +void vfnmsub132ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM|T_EVEX|T_ER_Z|T_B32, 0x9E); } +void vfnmsub132sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_W1|T_EW1|T_EVEX|T_ER_X, 0x9F); } +void vfnmsub132ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_W0|T_EVEX|T_ER_X, 0x9F); } +void vfnmsub213pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_ER_Z|T_B64, 0xAE); } +void vfnmsub213ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM|T_EVEX|T_ER_Z|T_B32, 0xAE); } +void vfnmsub213sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_W1|T_EW1|T_EVEX|T_ER_X, 0xAF); } +void vfnmsub213ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_W0|T_EVEX|T_ER_X, 0xAF); } +void vfnmsub231pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_ER_Z|T_B64, 0xBE); } +void vfnmsub231ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM|T_EVEX|T_ER_Z|T_B32, 0xBE); } +void vfnmsub231sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_W1|T_EW1|T_EVEX|T_ER_X, 0xBF); } +void vfnmsub231ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_W0|T_EVEX|T_ER_X, 0xBF); } +void vgatherdpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W1, 0x92, 0); } +void vgatherdps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W0, 0x92, 1); } +void vgatherqpd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W1, 0x93, 1); } +void vgatherqps(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W0, 0x93, 2); } +void vgf2p8affineinvqb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_W1|T_EW1|T_YMM|T_EVEX|T_SAE_Z|T_B64, 0xCF, imm); } +void vgf2p8affineqb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_W1|T_EW1|T_YMM|T_EVEX|T_SAE_Z|T_B64, 0xCE, imm); } +void vgf2p8mulb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM|T_EVEX|T_SAE_Z, 0xCF); } +void vhaddpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66|T_0F|T_YMM, 0x7C); } +void vhaddps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_F2|T_0F|T_YMM, 0x7C); } +void vhsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66|T_0F|T_YMM, 0x7D); } +void vhsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_F2|T_0F|T_YMM, 0x7D); } +void vinsertf128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x18, imm); } +void vinserti128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isXMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x38, imm); } +void vinsertps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F3A|T_W0|T_EVEX, 0x21, imm); } +void vlddqu(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, cvtIdx0(x), addr, T_0F | T_F2 | T_W0 | T_YMM, 0xF0); } +void vldmxcsr(const Address& addr) { opAVX_X_X_XM(xm2, xm0, addr, T_0F, 0xAE); } +void vmaskmovdqu(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, T_0F | T_66, 0xF7); } +void vmaskmovpd(const Address& addr, const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x2, x1, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x2F); } +void vmaskmovpd(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x2D); } +void vmaskmovps(const Address& addr, const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x2, x1, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x2E); } +void vmaskmovps(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x2C); } +void vmaxpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_SAE_Z | T_B64, 0x5F); } +void vmaxps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_W0 | T_YMM | T_EVEX | T_SAE_Z | T_B32, 0x5F); } +void vmaxsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_SAE_X | T_N8, 0x5F); } +void vmaxss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_W0 | T_EVEX | T_SAE_X | T_N4, 0x5F); } +void vminpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_SAE_Z | T_B64, 0x5D); } +void vminps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_W0 | T_YMM | T_EVEX | T_SAE_Z | T_B32, 0x5D); } +void vminsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_SAE_X | T_N8, 0x5D); } +void vminss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_W0 | T_EVEX | T_SAE_X | T_N4, 0x5D); } +void vmovapd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66|T_0F|T_EW1|T_YMM|T_EVEX|T_M_K, 0x29); } +void vmovapd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66|T_0F|T_EW1|T_YMM|T_EVEX, 0x28); } +void vmovaps(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F|T_W0|T_YMM|T_EVEX|T_M_K, 0x29); } +void vmovaps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F|T_W0|T_YMM|T_EVEX, 0x28); } +void vmovddup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_DUP|T_F2|T_0F|T_EW1|T_YMM|T_EVEX, 0x12); } +void vmovdqa(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66|T_0F|T_YMM, 0x7F); } +void vmovdqa(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66|T_0F|T_YMM, 0x6F); } +void vmovdqu(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_F3|T_0F|T_YMM, 0x7F); } +void vmovdqu(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_F3|T_0F|T_YMM, 0x6F); } +void vmovhlps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x1, x2, op, T_0F | T_EVEX | T_W0, 0x12); } +void vmovhpd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_N8|T_66|T_0F|T_EW1|T_EVEX, 0x17); } +void vmovhpd(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, op1, op2, T_N8|T_66|T_0F|T_EW1|T_EVEX, 0x16); } +void vmovhps(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_N8|T_0F|T_W0|T_EVEX, 0x17); } +void vmovhps(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, op1, op2, T_N8|T_0F|T_W0|T_EVEX, 0x16); } +void vmovlhps(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x1, x2, op, T_0F | T_EVEX | T_W0, 0x16); } +void vmovlpd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_N8|T_66|T_0F|T_EW1|T_EVEX, 0x13); } +void vmovlpd(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, op1, op2, T_N8|T_66|T_0F|T_EW1|T_EVEX, 0x12); } +void vmovlps(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_N8|T_0F|T_W0|T_EVEX, 0x13); } +void vmovlps(const Xmm& x, const Operand& op1, const Operand& op2 = Operand()) { if (!op2.isNone() && !op2.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, op1, op2, T_N8|T_0F|T_W0|T_EVEX, 0x12); } +void vmovmskpd(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), cvtIdx0(x), x, T_0F | T_66 | T_W0 | T_YMM, 0x50); } +void vmovmskps(const Reg& r, const Xmm& x) { if (!r.isBit(i32e)) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x.isXMM() ? Xmm(r.getIdx()) : Ymm(r.getIdx()), cvtIdx0(x), x, T_0F | T_W0 | T_YMM, 0x50); } +void vmovntdq(const Address& addr, const Xmm& x) { opVex(x, 0, addr, T_0F | T_66 | T_YMM | T_EVEX | T_W0, 0xE7); } +void vmovntdqa(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_0F38 | T_66 | T_YMM | T_EVEX | T_W0, 0x2A); } +void vmovntpd(const Address& addr, const Xmm& x) { opVex(x, 0, addr, T_0F | T_66 | T_YMM | T_EVEX | T_EW1, 0x2B); } +void vmovntps(const Address& addr, const Xmm& x) { opVex(x, 0, addr, T_0F | T_YMM | T_EVEX | T_W0, 0x2B); } +void vmovq(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_0F | T_66 | T_EVEX | T_EW1 | T_N8, x.getIdx() < 16 ? 0xD6 : 0x7E); } +void vmovq(const Xmm& x, const Address& addr) { uint64_t type; uint8_t code; if (x.getIdx() < 16) { type = T_0F | T_F3; code = 0x7E; } else { type = T_0F | T_66 | T_EVEX | T_EW1 | T_N8; code = 0x6E; } opAVX_X_X_XM(x, xm0, addr, type, code); } +void vmovq(const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x1, xm0, x2, T_0F | T_F3 | T_EVEX | T_EW1 | T_N8, 0x7E); } +void vmovsd(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_N8|T_F2|T_0F|T_EW1|T_EVEX | T_M_K, 0x11); } +void vmovsd(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, T_N8|T_F2|T_0F|T_EW1|T_EVEX, 0x10); } +void vmovsd(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x1, x2, op, T_N8|T_F2|T_0F|T_EW1|T_EVEX, 0x10); } +void vmovshdup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_F3|T_0F|T_W0|T_YMM|T_EVEX, 0x16); } +void vmovsldup(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_F3|T_0F|T_W0|T_YMM|T_EVEX, 0x12); } +void vmovss(const Address& addr, const Xmm& x) { opAVX_X_X_XM(x, xm0, addr, T_N4|T_F3|T_0F|T_W0|T_EVEX | T_M_K, 0x11); } +void vmovss(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, T_N4|T_F3|T_0F|T_W0|T_EVEX, 0x10); } +void vmovss(const Xmm& x1, const Xmm& x2, const Operand& op = Operand()) { if (!op.isNone() && !op.isXMM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x1, x2, op, T_N4|T_F3|T_0F|T_W0|T_EVEX, 0x10); } +void vmovupd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66|T_0F|T_EW1|T_YMM|T_EVEX|T_M_K, 0x11); } +void vmovupd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66|T_0F|T_EW1|T_YMM|T_EVEX, 0x10); } +void vmovups(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F|T_W0|T_YMM|T_EVEX|T_M_K, 0x11); } +void vmovups(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F|T_W0|T_YMM|T_EVEX, 0x10); } +void vmulpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x59); } +void vmulps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_W0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x59); } +void vmulsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_X | T_N8, 0x59); } +void vmulss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_W0 | T_EVEX | T_ER_X | T_N4, 0x59); } +void vorpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x56); } +void vorps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_W0 | T_YMM | T_EVEX | T_B32, 0x56); } +void vpabsb(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66|T_0F38|T_YMM|T_EVEX, 0x1C); } +void vpabsd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66|T_0F38|T_W0|T_YMM|T_EVEX|T_B32, 0x1E); } +void vpabsw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66|T_0F38|T_YMM|T_EVEX, 0x1D); } +void vpackssdw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_W0|T_YMM|T_EVEX|T_B32, 0x6B); } +void vpacksswb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0x63); } +void vpackusdw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM|T_EVEX|T_B32, 0x2B); } +void vpackuswb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0x67); } +void vpaddb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0xFC); } +void vpaddd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_W0|T_YMM|T_EVEX|T_B32, 0xFE); } +void vpaddq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_EW1|T_YMM|T_EVEX|T_B64, 0xD4); } +void vpaddsb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0xEC); } +void vpaddsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0xED); } +void vpaddusb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0xDC); } +void vpaddusw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0xDD); } +void vpaddw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0xFD); } +void vpalignr(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_YMM|T_EVEX, 0x0F, imm); } +void vpand(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM, 0xDB); } +void vpandn(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM, 0xDF); } +void vpavgb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0xE0); } +void vpavgw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0xE3); } +void vpblendd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_W0|T_YMM, 0x02, imm); } +void vpblendvb(const Xmm& x1, const Xmm& x2, const Operand& op, const Xmm& x4) { opAVX_X_X_XM(x1, x2, op, T_0F3A | T_66 | T_YMM, 0x4C, x4.getIdx() << 4); } +void vpblendw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_W0|T_YMM, 0x0E, imm); } +void vpbroadcastb(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(x, op, T_N1|T_66|T_0F38|T_W0|T_YMM|T_EVEX, 0x78); } +void vpbroadcastd(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(x, op, T_N4|T_66|T_0F38|T_W0|T_YMM|T_EVEX, 0x58); } +void vpbroadcastq(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(x, op, T_N8|T_66|T_0F38|T_W0|T_EW1|T_YMM|T_EVEX, 0x59); } +void vpbroadcastw(const Xmm& x, const Operand& op) { if (!(op.isXMM() || op.isMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_XM_IMM(x, op, T_N2|T_66|T_0F38|T_W0|T_YMM|T_EVEX, 0x79); } +void vpclmulhqhqdq(const Xmm& x1, const Xmm& x2, const Operand& op) { vpclmulqdq(x1, x2, op, 0x11); } +void vpclmulhqlqdq(const Xmm& x1, const Xmm& x2, const Operand& op) { vpclmulqdq(x1, x2, op, 0x01); } +void vpclmullqhqdq(const Xmm& x1, const Xmm& x2, const Operand& op) { vpclmulqdq(x1, x2, op, 0x10); } +void vpclmullqlqdq(const Xmm& x1, const Xmm& x2, const Operand& op) { vpclmulqdq(x1, x2, op, 0x00); } +void vpclmulqdq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_W0|T_YMM|T_EVEX, 0x44, imm); } +void vpcmpeqb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM, 0x74); } +void vpcmpeqd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM, 0x76); } +void vpcmpeqq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_YMM, 0x29); } +void vpcmpeqw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM, 0x75); } +void vpcmpestri(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66|T_0F3A, 0x61, imm); } +void vpcmpestrm(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66|T_0F3A, 0x60, imm); } +void vpcmpgtb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM, 0x64); } +void vpcmpgtd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM, 0x66); } +void vpcmpgtq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_YMM, 0x37); } +void vpcmpgtw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM, 0x65); } +void vpcmpistri(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66|T_0F3A, 0x63, imm); } +void vpcmpistrm(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66|T_0F3A, 0x62, imm); } +void vpdpbusd(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66|T_0F38|T_W0|T_YMM|T_SAE_Z|T_B32, 0x50, encoding); } +void vpdpbusds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66|T_0F38|T_W0|T_YMM|T_SAE_Z|T_B32, 0x51, encoding); } +void vpdpwssd(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66|T_0F38|T_W0|T_YMM|T_SAE_Z|T_B32, 0x52, encoding); } +void vpdpwssds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66|T_0F38|T_W0|T_YMM|T_SAE_Z|T_B32, 0x53, encoding); } +void vperm2f128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x06, imm); } +void vperm2i128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x46, imm); } +void vpermd(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, T_66|T_0F38|T_W0|T_YMM|T_EVEX|T_B32, 0x36); } +void vpermilpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EW1|T_YMM|T_EVEX|T_B64, 0x0D); } +void vpermilpd(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66|T_0F3A|T_EW1|T_YMM|T_EVEX|T_B64, 0x05, imm); } +void vpermilps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM|T_EVEX|T_B32, 0x0C); } +void vpermilps(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66|T_0F3A|T_W0|T_YMM|T_EVEX|T_B32, 0x04, imm); } +void vpermpd(const Ymm& y, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(y, op, T_66|T_0F3A|T_W1|T_EW1|T_YMM|T_EVEX|T_B64, 0x01, imm); } +void vpermpd(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x16); } +void vpermps(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, T_66|T_0F38|T_W0|T_YMM|T_EVEX|T_B32, 0x16); } +void vpermq(const Ymm& y, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(y, op, T_66|T_0F3A|T_W1|T_EW1|T_YMM|T_EVEX|T_B64, 0x00, imm); } +void vpermq(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, T_66|T_0F38|T_EW1|T_YMM|T_EVEX|T_B64, 0x36); } +void vpextrb(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(8|16|i32e) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_EVEX | T_N1, 0x14, imm); } +void vpextrd(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(32) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_N4, 0x16, imm); } +void vpextrq(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(64) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x, 0, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x16, imm); } +void vpextrw(const Operand& op, const Xmm& x, uint8_t imm) { if (!((op.isREG(16|i32e) || op.isMEM()) && x.isXMM())) XBYAK_THROW(ERR_BAD_COMBINATION) if (op.isREG() && x.getIdx() < 16) { opAVX_X_X_XM(Xmm(op.getIdx()), xm0, x, T_0F | T_66, 0xC5, imm); } else { opVex(x, 0, op, T_0F3A | T_66 | T_EVEX | T_N2, 0x15, imm); } } +void vpgatherdd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W0, 0x90, 1); } +void vpgatherdq(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W1, 0x90, 0); } +void vpgatherqd(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W0, 0x91, 2); } +void vpgatherqq(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W1, 0x91, 1); } +void vphaddd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_YMM, 0x02); } +void vphaddsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_YMM, 0x03); } +void vphaddw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_YMM, 0x01); } +void vphminposuw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66|T_0F38, 0x41); } +void vphsubd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_YMM, 0x06); } +void vphsubsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_YMM, 0x07); } +void vphsubw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_YMM, 0x05); } +void vpinsrb(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_EVEX | T_N1, 0x20, imm); } +void vpinsrd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_N4, 0x22, imm); } +void vpinsrq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(64) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F3A | T_66 | T_W1 | T_EVEX | T_EW1 | T_N8, 0x22, imm); } +void vpinsrw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { if (!(x1.isXMM() && x2.isXMM() && (op.isREG(32) || op.isMEM()))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x1, &x2, op, T_0F | T_66 | T_EVEX | T_N2, 0xC4, imm); } +void vpmaddubsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_YMM|T_EVEX, 0x04); } +void vpmaddwd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0xF5); } +void vpmaskmovd(const Address& addr, const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x2, x1, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x8E); } +void vpmaskmovd(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x8C); } +void vpmaskmovq(const Address& addr, const Xmm& x1, const Xmm& x2) { opAVX_X_X_XM(x2, x1, addr, T_0F38 | T_66 | T_W1 | T_YMM, 0x8E); } +void vpmaskmovq(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_66 | T_W1 | T_YMM, 0x8C); } +void vpmaxsb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_YMM|T_EVEX, 0x3C); } +void vpmaxsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM|T_EVEX|T_B32, 0x3D); } +void vpmaxsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0xEE); } +void vpmaxub(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0xDE); } +void vpmaxud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM|T_EVEX|T_B32, 0x3F); } +void vpmaxuw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_YMM|T_EVEX, 0x3E); } +void vpminsb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_YMM|T_EVEX, 0x38); } +void vpminsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM|T_EVEX|T_B32, 0x39); } +void vpminsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0xEA); } +void vpminub(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0xDA); } +void vpminud(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM|T_EVEX|T_B32, 0x3B); } +void vpminuw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_YMM|T_EVEX, 0x3A); } +void vpmovmskb(const Reg32e& r, const Xmm& x) { if (!x.is(Operand::XMM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(x.isYMM() ? Ymm(r.getIdx()) : Xmm(r.getIdx()), 0, x, T_0F | T_66 | T_YMM, 0xD7); } +void vpmovsxbd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N4|T_N_VL|T_66|T_0F38|T_YMM|T_EVEX, 0x21); } +void vpmovsxbq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N2|T_N_VL|T_66|T_0F38|T_YMM|T_EVEX, 0x22); } +void vpmovsxbw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N8|T_N_VL|T_66|T_0F38|T_YMM|T_EVEX, 0x20); } +void vpmovsxdq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N8|T_N_VL|T_66|T_0F38|T_W0|T_YMM|T_EVEX, 0x25); } +void vpmovsxwd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N8|T_N_VL|T_66|T_0F38|T_YMM|T_EVEX, 0x23); } +void vpmovsxwq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N4|T_N_VL|T_66|T_0F38|T_YMM|T_EVEX, 0x24); } +void vpmovzxbd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N4|T_N_VL|T_66|T_0F38|T_YMM|T_EVEX, 0x31); } +void vpmovzxbq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N2|T_N_VL|T_66|T_0F38|T_YMM|T_EVEX, 0x32); } +void vpmovzxbw(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N8|T_N_VL|T_66|T_0F38|T_YMM|T_EVEX, 0x30); } +void vpmovzxdq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N8|T_N_VL|T_66|T_0F38|T_W0|T_YMM|T_EVEX, 0x35); } +void vpmovzxwd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N8|T_N_VL|T_66|T_0F38|T_YMM|T_EVEX, 0x33); } +void vpmovzxwq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N4|T_N_VL|T_66|T_0F38|T_YMM|T_EVEX, 0x34); } +void vpmuldq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_EVEX|T_B64, 0x28); } +void vpmulhrsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_YMM|T_EVEX, 0x0B); } +void vpmulhuw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0xE4); } +void vpmulhw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0xE5); } +void vpmulld(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM|T_EVEX|T_B32, 0x40); } +void vpmullw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0xD5); } +void vpmuludq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_EW1|T_YMM|T_EVEX|T_B64, 0xF4); } +void vpor(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM, 0xEB); } +void vpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0xF6); } +void vpshufb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_YMM|T_EVEX, 0x00); } +void vpshufd(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66|T_0F|T_W0|T_YMM|T_EVEX|T_B32, 0x70, imm); } +void vpshufhw(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_F3|T_0F|T_YMM|T_EVEX, 0x70, imm); } +void vpshuflw(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_F2|T_0F|T_YMM|T_EVEX, 0x70, imm); } +void vpsignb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_YMM, 0x08); } +void vpsignd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_YMM, 0x0A); } +void vpsignw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_YMM, 0x09); } +void vpslld(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66|T_0F|T_W0|T_YMM|T_EVEX|T_B32|T_MEM_EVEX, 0x72, imm); } +void vpslld(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16|T_66|T_0F|T_W0|T_YMM|T_EVEX, 0xF2); } +void vpslldq(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 7), x, op, T_66|T_0F|T_YMM|T_EVEX|T_MEM_EVEX, 0x73, imm); } +void vpsllq(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66|T_0F|T_EW1|T_YMM|T_EVEX|T_B64|T_MEM_EVEX, 0x73, imm); } +void vpsllq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16|T_66|T_0F|T_EW1|T_YMM|T_EVEX, 0xF3); } +void vpsllvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM|T_EVEX|T_B32, 0x47); } +void vpsllvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_B64, 0x47); } +void vpsllw(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 6), x, op, T_66|T_0F|T_YMM|T_EVEX|T_MEM_EVEX, 0x71, imm); } +void vpsllw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16|T_66|T_0F|T_YMM|T_EVEX, 0xF1); } +void vpsrad(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66|T_0F|T_W0|T_YMM|T_EVEX|T_B32|T_MEM_EVEX, 0x72, imm); } +void vpsrad(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16|T_66|T_0F|T_W0|T_YMM|T_EVEX, 0xE2); } +void vpsravd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM|T_EVEX|T_B32, 0x46); } +void vpsraw(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66|T_0F|T_YMM|T_EVEX|T_MEM_EVEX, 0x71, imm); } +void vpsraw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16|T_66|T_0F|T_YMM|T_EVEX, 0xE1); } +void vpsrld(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66|T_0F|T_W0|T_YMM|T_EVEX|T_B32|T_MEM_EVEX, 0x72, imm); } +void vpsrld(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16|T_66|T_0F|T_W0|T_YMM|T_EVEX, 0xD2); } +void vpsrldq(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 3), x, op, T_66|T_0F|T_YMM|T_EVEX|T_MEM_EVEX, 0x73, imm); } +void vpsrlq(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66|T_0F|T_EW1|T_YMM|T_EVEX|T_B64|T_MEM_EVEX, 0x73, imm); } +void vpsrlq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16|T_66|T_0F|T_EW1|T_YMM|T_EVEX, 0xD3); } +void vpsrlvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM|T_EVEX|T_B32, 0x45); } +void vpsrlvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W1|T_EW1|T_YMM|T_EVEX|T_B64, 0x45); } +void vpsrlw(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 2), x, op, T_66|T_0F|T_YMM|T_EVEX|T_MEM_EVEX, 0x71, imm); } +void vpsrlw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16|T_66|T_0F|T_YMM|T_EVEX, 0xD1); } +void vpsubb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0xF8); } +void vpsubd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_W0|T_YMM|T_EVEX|T_B32, 0xFA); } +void vpsubq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_EW1|T_YMM|T_EVEX|T_B64, 0xFB); } +void vpsubsb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0xE8); } +void vpsubsw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0xE9); } +void vpsubusb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0xD8); } +void vpsubusw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0xD9); } +void vpsubw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0xF9); } +void vptest(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66|T_0F38|T_YMM, 0x17); } +void vpunpckhbw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0x68); } +void vpunpckhdq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_W0|T_YMM|T_EVEX|T_B32, 0x6A); } +void vpunpckhqdq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_EW1|T_YMM|T_EVEX|T_B64, 0x6D); } +void vpunpckhwd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0x69); } +void vpunpcklbw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0x60); } +void vpunpckldq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_W0|T_YMM|T_EVEX|T_B32, 0x62); } +void vpunpcklqdq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_EW1|T_YMM|T_EVEX|T_B64, 0x6C); } +void vpunpcklwd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM|T_EVEX, 0x61); } +void vpxor(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_YMM, 0xEF); } +void vrcpps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F|T_YMM, 0x53); } +void vrcpss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3|T_0F, 0x53); } +void vroundpd(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66|T_0F3A|T_YMM, 0x09, imm); } +void vroundps(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66|T_0F3A|T_YMM, 0x08, imm); } +void vroundsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_W0, 0x0B, imm); } +void vroundss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_W0, 0x0A, imm); } +void vrsqrtps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F|T_YMM, 0x52); } +void vrsqrtss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3|T_0F, 0x52); } +void vsha512msg1(const Ymm& y, const Xmm& x) { if (!(y.isYMM() && x.isXMM())) XBYAK_THROW(ERR_BAD_PARAMETER) opVex(y, 0, x, T_F2 | T_0F38 | T_W0 | T_YMM, 0xCC); } +void vsha512msg2(const Ymm& y1, const Ymm& y2) { if (!(y1.isYMM() && y2.isYMM())) XBYAK_THROW(ERR_BAD_PARAMETER) opVex(y1, 0, y2, T_F2 | T_0F38 | T_W0 | T_YMM, 0xCD); } +void vsha512rnds2(const Ymm& y1, const Ymm& y2, const Xmm& x) { if (!(y1.isYMM() && y2.isYMM() && x.isXMM())) XBYAK_THROW(ERR_BAD_PARAMETER) opVex(y1, &y2, x, T_F2 | T_0F38 | T_W0 | T_YMM, 0xCB); } +void vshufpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_EW1|T_YMM|T_EVEX|T_B64, 0xC6, imm); } +void vshufps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_0F|T_W0|T_YMM|T_EVEX|T_B32, 0xC6, imm); } +void vsm3msg1(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38|T_W0|T_EVEX, 0xDA); } +void vsm3msg2(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_EVEX, 0xDA); } +void vsm3rnds2(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_W0|T_EVEX, 0xDE, imm); } +void vsm4key4(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3|T_0F38|T_W0|T_EVEX, 0xDA); } +void vsm4rnds4(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2|T_0F38|T_W0|T_EVEX, 0xDA); } +void vsqrtpd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66|T_0F|T_EW1|T_YMM|T_EVEX|T_ER_Z|T_B64, 0x51); } +void vsqrtps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F|T_W0|T_YMM|T_EVEX|T_ER_Z|T_B32, 0x51); } +void vsqrtsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_F2|T_0F|T_EW1|T_EVEX|T_ER_X, 0x51); } +void vsqrtss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_F3|T_0F|T_W0|T_EVEX|T_ER_X, 0x51); } +void vstmxcsr(const Address& addr) { opAVX_X_X_XM(xm3, xm0, addr, T_0F, 0xAE); } +void vsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x5C); } +void vsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_W0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5C); } +void vsubsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_X | T_N8, 0x5C); } +void vsubss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_W0 | T_EVEX | T_ER_X | T_N4, 0x5C); } +void vtestpd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66|T_0F38|T_YMM, 0x0F); } +void vtestps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66|T_0F38|T_YMM, 0x0E); } +void vucomisd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N8|T_66|T_0F|T_EW1|T_EVEX|T_SAE_X, 0x2E); } +void vucomiss(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N4|T_0F|T_W0|T_EVEX|T_SAE_X, 0x2E); } +void vunpckhpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_EW1|T_YMM|T_EVEX|T_B64, 0x15); } +void vunpckhps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F|T_W0|T_YMM|T_EVEX|T_B32, 0x15); } +void vunpcklpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_EW1|T_YMM|T_EVEX|T_B64, 0x14); } +void vunpcklps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F|T_W0|T_YMM|T_EVEX|T_B32, 0x14); } +void vxorpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_B64, 0x57); } +void vxorps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_W0 | T_YMM | T_EVEX | T_B32, 0x57); } +void vzeroall() { db(0xC5); db(0xFC); db(0x77); } +void vzeroupper() { db(0xC5); db(0xF8); db(0x77); } +void wait() { db(0x9B); } +void wbinvd() { db(0x0F); db(0x09); } +void wrmsr() { db(0x0F); db(0x30); } +void xabort(uint8_t imm) { db(0xC6); db(0xF8); db(imm); } +void xadd(const Operand& op, const Reg& reg) { opRO(reg, op, T_0F, 0xC0 | (reg.isBit(8) ? 0 : 1), op.getBit() == reg.getBit()); } +void xbegin(uint32_t rel) { db(0xC7); db(0xF8); dd(rel); } +void xend() { db(0x0F); db(0x01); db(0xD5); } +void xgetbv() { db(0x0F); db(0x01); db(0xD0); } +void xlatb() { db(0xD7); } +void xor_(const Operand& op, uint32_t imm) { opOI(op, imm, 0x30, 6); } +void xor_(const Operand& op1, const Operand& op2) { opRO_MR(op1, op2, 0x30); } +void xor_(const Reg& d, const Operand& op, uint32_t imm) { opROI(d, op, imm, T_NF|T_CODE1_IF1, 6); } +void xor_(const Reg& d, const Operand& op1, const Operand& op2) { opROO(d, op1, op2, T_NF|T_CODE1_IF1, 0x30); } +void xorpd(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_66, 0x57, isXMM_XMMorMEM); } +void xorps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F, 0x57, isXMM_XMMorMEM); } +void xresldtrk() { db(0xF2); db(0x0F); db(0x01); db(0xE9); } +void xsusldtrk() { db(0xF2); db(0x0F); db(0x01); db(0xE8); } +#ifdef XBYAK_ENABLE_OMITTED_OPERAND +void vblendpd(const Xmm& x, const Operand& op, uint8_t imm) { vblendpd(x, x, op, imm); } +void vblendps(const Xmm& x, const Operand& op, uint8_t imm) { vblendps(x, x, op, imm); } +void vblendvpd(const Xmm& x1, const Operand& op, const Xmm& x4) { vblendvpd(x1, x1, op, x4); } +void vblendvps(const Xmm& x1, const Operand& op, const Xmm& x4) { vblendvps(x1, x1, op, x4); } +void vcmpeq_ospd(const Xmm& x, const Operand& op) { vcmpeq_ospd(x, x, op); } +void vcmpeq_osps(const Xmm& x, const Operand& op) { vcmpeq_osps(x, x, op); } +void vcmpeq_ossd(const Xmm& x, const Operand& op) { vcmpeq_ossd(x, x, op); } +void vcmpeq_osss(const Xmm& x, const Operand& op) { vcmpeq_osss(x, x, op); } +void vcmpeq_uqpd(const Xmm& x, const Operand& op) { vcmpeq_uqpd(x, x, op); } +void vcmpeq_uqps(const Xmm& x, const Operand& op) { vcmpeq_uqps(x, x, op); } +void vcmpeq_uqsd(const Xmm& x, const Operand& op) { vcmpeq_uqsd(x, x, op); } +void vcmpeq_uqss(const Xmm& x, const Operand& op) { vcmpeq_uqss(x, x, op); } +void vcmpeq_uspd(const Xmm& x, const Operand& op) { vcmpeq_uspd(x, x, op); } +void vcmpeq_usps(const Xmm& x, const Operand& op) { vcmpeq_usps(x, x, op); } +void vcmpeq_ussd(const Xmm& x, const Operand& op) { vcmpeq_ussd(x, x, op); } +void vcmpeq_usss(const Xmm& x, const Operand& op) { vcmpeq_usss(x, x, op); } +void vcmpeqpd(const Xmm& x, const Operand& op) { vcmpeqpd(x, x, op); } +void vcmpeqps(const Xmm& x, const Operand& op) { vcmpeqps(x, x, op); } +void vcmpeqsd(const Xmm& x, const Operand& op) { vcmpeqsd(x, x, op); } +void vcmpeqss(const Xmm& x, const Operand& op) { vcmpeqss(x, x, op); } +void vcmpfalse_ospd(const Xmm& x, const Operand& op) { vcmpfalse_ospd(x, x, op); } +void vcmpfalse_osps(const Xmm& x, const Operand& op) { vcmpfalse_osps(x, x, op); } +void vcmpfalse_ossd(const Xmm& x, const Operand& op) { vcmpfalse_ossd(x, x, op); } +void vcmpfalse_osss(const Xmm& x, const Operand& op) { vcmpfalse_osss(x, x, op); } +void vcmpfalsepd(const Xmm& x, const Operand& op) { vcmpfalsepd(x, x, op); } +void vcmpfalseps(const Xmm& x, const Operand& op) { vcmpfalseps(x, x, op); } +void vcmpfalsesd(const Xmm& x, const Operand& op) { vcmpfalsesd(x, x, op); } +void vcmpfalsess(const Xmm& x, const Operand& op) { vcmpfalsess(x, x, op); } +void vcmpge_oqpd(const Xmm& x, const Operand& op) { vcmpge_oqpd(x, x, op); } +void vcmpge_oqps(const Xmm& x, const Operand& op) { vcmpge_oqps(x, x, op); } +void vcmpge_oqsd(const Xmm& x, const Operand& op) { vcmpge_oqsd(x, x, op); } +void vcmpge_oqss(const Xmm& x, const Operand& op) { vcmpge_oqss(x, x, op); } +void vcmpgepd(const Xmm& x, const Operand& op) { vcmpgepd(x, x, op); } +void vcmpgeps(const Xmm& x, const Operand& op) { vcmpgeps(x, x, op); } +void vcmpgesd(const Xmm& x, const Operand& op) { vcmpgesd(x, x, op); } +void vcmpgess(const Xmm& x, const Operand& op) { vcmpgess(x, x, op); } +void vcmpgt_oqpd(const Xmm& x, const Operand& op) { vcmpgt_oqpd(x, x, op); } +void vcmpgt_oqps(const Xmm& x, const Operand& op) { vcmpgt_oqps(x, x, op); } +void vcmpgt_oqsd(const Xmm& x, const Operand& op) { vcmpgt_oqsd(x, x, op); } +void vcmpgt_oqss(const Xmm& x, const Operand& op) { vcmpgt_oqss(x, x, op); } +void vcmpgtpd(const Xmm& x, const Operand& op) { vcmpgtpd(x, x, op); } +void vcmpgtps(const Xmm& x, const Operand& op) { vcmpgtps(x, x, op); } +void vcmpgtsd(const Xmm& x, const Operand& op) { vcmpgtsd(x, x, op); } +void vcmpgtss(const Xmm& x, const Operand& op) { vcmpgtss(x, x, op); } +void vcmple_oqpd(const Xmm& x, const Operand& op) { vcmple_oqpd(x, x, op); } +void vcmple_oqps(const Xmm& x, const Operand& op) { vcmple_oqps(x, x, op); } +void vcmple_oqsd(const Xmm& x, const Operand& op) { vcmple_oqsd(x, x, op); } +void vcmple_oqss(const Xmm& x, const Operand& op) { vcmple_oqss(x, x, op); } +void vcmplepd(const Xmm& x, const Operand& op) { vcmplepd(x, x, op); } +void vcmpleps(const Xmm& x, const Operand& op) { vcmpleps(x, x, op); } +void vcmplesd(const Xmm& x, const Operand& op) { vcmplesd(x, x, op); } +void vcmpless(const Xmm& x, const Operand& op) { vcmpless(x, x, op); } +void vcmplt_oqpd(const Xmm& x, const Operand& op) { vcmplt_oqpd(x, x, op); } +void vcmplt_oqps(const Xmm& x, const Operand& op) { vcmplt_oqps(x, x, op); } +void vcmplt_oqsd(const Xmm& x, const Operand& op) { vcmplt_oqsd(x, x, op); } +void vcmplt_oqss(const Xmm& x, const Operand& op) { vcmplt_oqss(x, x, op); } +void vcmpltpd(const Xmm& x, const Operand& op) { vcmpltpd(x, x, op); } +void vcmpltps(const Xmm& x, const Operand& op) { vcmpltps(x, x, op); } +void vcmpltsd(const Xmm& x, const Operand& op) { vcmpltsd(x, x, op); } +void vcmpltss(const Xmm& x, const Operand& op) { vcmpltss(x, x, op); } +void vcmpneq_oqpd(const Xmm& x, const Operand& op) { vcmpneq_oqpd(x, x, op); } +void vcmpneq_oqps(const Xmm& x, const Operand& op) { vcmpneq_oqps(x, x, op); } +void vcmpneq_oqsd(const Xmm& x, const Operand& op) { vcmpneq_oqsd(x, x, op); } +void vcmpneq_oqss(const Xmm& x, const Operand& op) { vcmpneq_oqss(x, x, op); } +void vcmpneq_ospd(const Xmm& x, const Operand& op) { vcmpneq_ospd(x, x, op); } +void vcmpneq_osps(const Xmm& x, const Operand& op) { vcmpneq_osps(x, x, op); } +void vcmpneq_ossd(const Xmm& x, const Operand& op) { vcmpneq_ossd(x, x, op); } +void vcmpneq_osss(const Xmm& x, const Operand& op) { vcmpneq_osss(x, x, op); } +void vcmpneq_uspd(const Xmm& x, const Operand& op) { vcmpneq_uspd(x, x, op); } +void vcmpneq_usps(const Xmm& x, const Operand& op) { vcmpneq_usps(x, x, op); } +void vcmpneq_ussd(const Xmm& x, const Operand& op) { vcmpneq_ussd(x, x, op); } +void vcmpneq_usss(const Xmm& x, const Operand& op) { vcmpneq_usss(x, x, op); } +void vcmpneqpd(const Xmm& x, const Operand& op) { vcmpneqpd(x, x, op); } +void vcmpneqps(const Xmm& x, const Operand& op) { vcmpneqps(x, x, op); } +void vcmpneqsd(const Xmm& x, const Operand& op) { vcmpneqsd(x, x, op); } +void vcmpneqss(const Xmm& x, const Operand& op) { vcmpneqss(x, x, op); } +void vcmpnge_uqpd(const Xmm& x, const Operand& op) { vcmpnge_uqpd(x, x, op); } +void vcmpnge_uqps(const Xmm& x, const Operand& op) { vcmpnge_uqps(x, x, op); } +void vcmpnge_uqsd(const Xmm& x, const Operand& op) { vcmpnge_uqsd(x, x, op); } +void vcmpnge_uqss(const Xmm& x, const Operand& op) { vcmpnge_uqss(x, x, op); } +void vcmpngepd(const Xmm& x, const Operand& op) { vcmpngepd(x, x, op); } +void vcmpngeps(const Xmm& x, const Operand& op) { vcmpngeps(x, x, op); } +void vcmpngesd(const Xmm& x, const Operand& op) { vcmpngesd(x, x, op); } +void vcmpngess(const Xmm& x, const Operand& op) { vcmpngess(x, x, op); } +void vcmpngt_uqpd(const Xmm& x, const Operand& op) { vcmpngt_uqpd(x, x, op); } +void vcmpngt_uqps(const Xmm& x, const Operand& op) { vcmpngt_uqps(x, x, op); } +void vcmpngt_uqsd(const Xmm& x, const Operand& op) { vcmpngt_uqsd(x, x, op); } +void vcmpngt_uqss(const Xmm& x, const Operand& op) { vcmpngt_uqss(x, x, op); } +void vcmpngtpd(const Xmm& x, const Operand& op) { vcmpngtpd(x, x, op); } +void vcmpngtps(const Xmm& x, const Operand& op) { vcmpngtps(x, x, op); } +void vcmpngtsd(const Xmm& x, const Operand& op) { vcmpngtsd(x, x, op); } +void vcmpngtss(const Xmm& x, const Operand& op) { vcmpngtss(x, x, op); } +void vcmpnle_uqpd(const Xmm& x, const Operand& op) { vcmpnle_uqpd(x, x, op); } +void vcmpnle_uqps(const Xmm& x, const Operand& op) { vcmpnle_uqps(x, x, op); } +void vcmpnle_uqsd(const Xmm& x, const Operand& op) { vcmpnle_uqsd(x, x, op); } +void vcmpnle_uqss(const Xmm& x, const Operand& op) { vcmpnle_uqss(x, x, op); } +void vcmpnlepd(const Xmm& x, const Operand& op) { vcmpnlepd(x, x, op); } +void vcmpnleps(const Xmm& x, const Operand& op) { vcmpnleps(x, x, op); } +void vcmpnlesd(const Xmm& x, const Operand& op) { vcmpnlesd(x, x, op); } +void vcmpnless(const Xmm& x, const Operand& op) { vcmpnless(x, x, op); } +void vcmpnlt_uqpd(const Xmm& x, const Operand& op) { vcmpnlt_uqpd(x, x, op); } +void vcmpnlt_uqps(const Xmm& x, const Operand& op) { vcmpnlt_uqps(x, x, op); } +void vcmpnlt_uqsd(const Xmm& x, const Operand& op) { vcmpnlt_uqsd(x, x, op); } +void vcmpnlt_uqss(const Xmm& x, const Operand& op) { vcmpnlt_uqss(x, x, op); } +void vcmpnltpd(const Xmm& x, const Operand& op) { vcmpnltpd(x, x, op); } +void vcmpnltps(const Xmm& x, const Operand& op) { vcmpnltps(x, x, op); } +void vcmpnltsd(const Xmm& x, const Operand& op) { vcmpnltsd(x, x, op); } +void vcmpnltss(const Xmm& x, const Operand& op) { vcmpnltss(x, x, op); } +void vcmpord_spd(const Xmm& x, const Operand& op) { vcmpord_spd(x, x, op); } +void vcmpord_sps(const Xmm& x, const Operand& op) { vcmpord_sps(x, x, op); } +void vcmpord_ssd(const Xmm& x, const Operand& op) { vcmpord_ssd(x, x, op); } +void vcmpord_sss(const Xmm& x, const Operand& op) { vcmpord_sss(x, x, op); } +void vcmpordpd(const Xmm& x, const Operand& op) { vcmpordpd(x, x, op); } +void vcmpordps(const Xmm& x, const Operand& op) { vcmpordps(x, x, op); } +void vcmpordsd(const Xmm& x, const Operand& op) { vcmpordsd(x, x, op); } +void vcmpordss(const Xmm& x, const Operand& op) { vcmpordss(x, x, op); } +void vcmppd(const Xmm& x, const Operand& op, uint8_t imm) { vcmppd(x, x, op, imm); } +void vcmpps(const Xmm& x, const Operand& op, uint8_t imm) { vcmpps(x, x, op, imm); } +void vcmpsd(const Xmm& x, const Operand& op, uint8_t imm) { vcmpsd(x, x, op, imm); } +void vcmpss(const Xmm& x, const Operand& op, uint8_t imm) { vcmpss(x, x, op, imm); } +void vcmptrue_uspd(const Xmm& x, const Operand& op) { vcmptrue_uspd(x, x, op); } +void vcmptrue_usps(const Xmm& x, const Operand& op) { vcmptrue_usps(x, x, op); } +void vcmptrue_ussd(const Xmm& x, const Operand& op) { vcmptrue_ussd(x, x, op); } +void vcmptrue_usss(const Xmm& x, const Operand& op) { vcmptrue_usss(x, x, op); } +void vcmptruepd(const Xmm& x, const Operand& op) { vcmptruepd(x, x, op); } +void vcmptrueps(const Xmm& x, const Operand& op) { vcmptrueps(x, x, op); } +void vcmptruesd(const Xmm& x, const Operand& op) { vcmptruesd(x, x, op); } +void vcmptruess(const Xmm& x, const Operand& op) { vcmptruess(x, x, op); } +void vcmpunord_spd(const Xmm& x, const Operand& op) { vcmpunord_spd(x, x, op); } +void vcmpunord_sps(const Xmm& x, const Operand& op) { vcmpunord_sps(x, x, op); } +void vcmpunord_ssd(const Xmm& x, const Operand& op) { vcmpunord_ssd(x, x, op); } +void vcmpunord_sss(const Xmm& x, const Operand& op) { vcmpunord_sss(x, x, op); } +void vcmpunordpd(const Xmm& x, const Operand& op) { vcmpunordpd(x, x, op); } +void vcmpunordps(const Xmm& x, const Operand& op) { vcmpunordps(x, x, op); } +void vcmpunordsd(const Xmm& x, const Operand& op) { vcmpunordsd(x, x, op); } +void vcmpunordss(const Xmm& x, const Operand& op) { vcmpunordss(x, x, op); } +void vcvtsd2ss(const Xmm& x, const Operand& op) { vcvtsd2ss(x, x, op); } +void vcvtsi2sd(const Xmm& x, const Operand& op) { vcvtsi2sd(x, x, op); } +void vcvtsi2ss(const Xmm& x, const Operand& op) { vcvtsi2ss(x, x, op); } +void vcvtss2sd(const Xmm& x, const Operand& op) { vcvtss2sd(x, x, op); } +void vdppd(const Xmm& x, const Operand& op, uint8_t imm) { vdppd(x, x, op, imm); } +void vdpps(const Xmm& x, const Operand& op, uint8_t imm) { vdpps(x, x, op, imm); } +void vinsertps(const Xmm& x, const Operand& op, uint8_t imm) { vinsertps(x, x, op, imm); } +void vmpsadbw(const Xmm& x, const Operand& op, uint8_t imm) { vmpsadbw(x, x, op, imm); } +void vpackssdw(const Xmm& x, const Operand& op) { vpackssdw(x, x, op); } +void vpacksswb(const Xmm& x, const Operand& op) { vpacksswb(x, x, op); } +void vpackusdw(const Xmm& x, const Operand& op) { vpackusdw(x, x, op); } +void vpackuswb(const Xmm& x, const Operand& op) { vpackuswb(x, x, op); } +void vpaddb(const Xmm& x, const Operand& op) { vpaddb(x, x, op); } +void vpaddd(const Xmm& x, const Operand& op) { vpaddd(x, x, op); } +void vpaddq(const Xmm& x, const Operand& op) { vpaddq(x, x, op); } +void vpaddsb(const Xmm& x, const Operand& op) { vpaddsb(x, x, op); } +void vpaddsw(const Xmm& x, const Operand& op) { vpaddsw(x, x, op); } +void vpaddusb(const Xmm& x, const Operand& op) { vpaddusb(x, x, op); } +void vpaddusw(const Xmm& x, const Operand& op) { vpaddusw(x, x, op); } +void vpaddw(const Xmm& x, const Operand& op) { vpaddw(x, x, op); } +void vpalignr(const Xmm& x, const Operand& op, uint8_t imm) { vpalignr(x, x, op, imm); } +void vpand(const Xmm& x, const Operand& op) { vpand(x, x, op); } +void vpandn(const Xmm& x, const Operand& op) { vpandn(x, x, op); } +void vpavgb(const Xmm& x, const Operand& op) { vpavgb(x, x, op); } +void vpavgw(const Xmm& x, const Operand& op) { vpavgw(x, x, op); } +void vpblendd(const Xmm& x, const Operand& op, uint8_t imm) { vpblendd(x, x, op, imm); } +void vpblendvb(const Xmm& x1, const Operand& op, const Xmm& x4) { vpblendvb(x1, x1, op, x4); } +void vpblendw(const Xmm& x, const Operand& op, uint8_t imm) { vpblendw(x, x, op, imm); } +void vpclmulqdq(const Xmm& x, const Operand& op, uint8_t imm) { vpclmulqdq(x, x, op, imm); } +void vpcmpeqb(const Xmm& x, const Operand& op) { vpcmpeqb(x, x, op); } +void vpcmpeqd(const Xmm& x, const Operand& op) { vpcmpeqd(x, x, op); } +void vpcmpeqq(const Xmm& x, const Operand& op) { vpcmpeqq(x, x, op); } +void vpcmpeqw(const Xmm& x, const Operand& op) { vpcmpeqw(x, x, op); } +void vpcmpgtb(const Xmm& x, const Operand& op) { vpcmpgtb(x, x, op); } +void vpcmpgtd(const Xmm& x, const Operand& op) { vpcmpgtd(x, x, op); } +void vpcmpgtq(const Xmm& x, const Operand& op) { vpcmpgtq(x, x, op); } +void vpcmpgtw(const Xmm& x, const Operand& op) { vpcmpgtw(x, x, op); } +void vphaddd(const Xmm& x, const Operand& op) { vphaddd(x, x, op); } +void vphaddsw(const Xmm& x, const Operand& op) { vphaddsw(x, x, op); } +void vphaddw(const Xmm& x, const Operand& op) { vphaddw(x, x, op); } +void vphsubd(const Xmm& x, const Operand& op) { vphsubd(x, x, op); } +void vphsubsw(const Xmm& x, const Operand& op) { vphsubsw(x, x, op); } +void vphsubw(const Xmm& x, const Operand& op) { vphsubw(x, x, op); } +void vpinsrb(const Xmm& x, const Operand& op, uint8_t imm) { vpinsrb(x, x, op, imm); } +void vpinsrd(const Xmm& x, const Operand& op, uint8_t imm) { vpinsrd(x, x, op, imm); } +void vpinsrq(const Xmm& x, const Operand& op, uint8_t imm) { vpinsrq(x, x, op, imm); } +void vpinsrw(const Xmm& x, const Operand& op, uint8_t imm) { vpinsrw(x, x, op, imm); } +void vpmaddubsw(const Xmm& x, const Operand& op) { vpmaddubsw(x, x, op); } +void vpmaddwd(const Xmm& x, const Operand& op) { vpmaddwd(x, x, op); } +void vpmaxsb(const Xmm& x, const Operand& op) { vpmaxsb(x, x, op); } +void vpmaxsd(const Xmm& x, const Operand& op) { vpmaxsd(x, x, op); } +void vpmaxsw(const Xmm& x, const Operand& op) { vpmaxsw(x, x, op); } +void vpmaxub(const Xmm& x, const Operand& op) { vpmaxub(x, x, op); } +void vpmaxud(const Xmm& x, const Operand& op) { vpmaxud(x, x, op); } +void vpmaxuw(const Xmm& x, const Operand& op) { vpmaxuw(x, x, op); } +void vpminsb(const Xmm& x, const Operand& op) { vpminsb(x, x, op); } +void vpminsd(const Xmm& x, const Operand& op) { vpminsd(x, x, op); } +void vpminsw(const Xmm& x, const Operand& op) { vpminsw(x, x, op); } +void vpminub(const Xmm& x, const Operand& op) { vpminub(x, x, op); } +void vpminud(const Xmm& x, const Operand& op) { vpminud(x, x, op); } +void vpminuw(const Xmm& x, const Operand& op) { vpminuw(x, x, op); } +void vpmuldq(const Xmm& x, const Operand& op) { vpmuldq(x, x, op); } +void vpmulhrsw(const Xmm& x, const Operand& op) { vpmulhrsw(x, x, op); } +void vpmulhuw(const Xmm& x, const Operand& op) { vpmulhuw(x, x, op); } +void vpmulhw(const Xmm& x, const Operand& op) { vpmulhw(x, x, op); } +void vpmulld(const Xmm& x, const Operand& op) { vpmulld(x, x, op); } +void vpmullw(const Xmm& x, const Operand& op) { vpmullw(x, x, op); } +void vpmuludq(const Xmm& x, const Operand& op) { vpmuludq(x, x, op); } +void vpor(const Xmm& x, const Operand& op) { vpor(x, x, op); } +void vpsadbw(const Xmm& x, const Operand& op) { vpsadbw(x, x, op); } +void vpsignb(const Xmm& x, const Operand& op) { vpsignb(x, x, op); } +void vpsignd(const Xmm& x, const Operand& op) { vpsignd(x, x, op); } +void vpsignw(const Xmm& x, const Operand& op) { vpsignw(x, x, op); } +void vpslld(const Xmm& x, const Operand& op) { vpslld(x, x, op); } +void vpslld(const Xmm& x, uint8_t imm) { vpslld(x, x, imm); } +void vpslldq(const Xmm& x, uint8_t imm) { vpslldq(x, x, imm); } +void vpsllq(const Xmm& x, const Operand& op) { vpsllq(x, x, op); } +void vpsllq(const Xmm& x, uint8_t imm) { vpsllq(x, x, imm); } +void vpsllw(const Xmm& x, const Operand& op) { vpsllw(x, x, op); } +void vpsllw(const Xmm& x, uint8_t imm) { vpsllw(x, x, imm); } +void vpsrad(const Xmm& x, const Operand& op) { vpsrad(x, x, op); } +void vpsrad(const Xmm& x, uint8_t imm) { vpsrad(x, x, imm); } +void vpsraw(const Xmm& x, const Operand& op) { vpsraw(x, x, op); } +void vpsraw(const Xmm& x, uint8_t imm) { vpsraw(x, x, imm); } +void vpsrld(const Xmm& x, const Operand& op) { vpsrld(x, x, op); } +void vpsrld(const Xmm& x, uint8_t imm) { vpsrld(x, x, imm); } +void vpsrldq(const Xmm& x, uint8_t imm) { vpsrldq(x, x, imm); } +void vpsrlq(const Xmm& x, const Operand& op) { vpsrlq(x, x, op); } +void vpsrlq(const Xmm& x, uint8_t imm) { vpsrlq(x, x, imm); } +void vpsrlw(const Xmm& x, const Operand& op) { vpsrlw(x, x, op); } +void vpsrlw(const Xmm& x, uint8_t imm) { vpsrlw(x, x, imm); } +void vpsubb(const Xmm& x, const Operand& op) { vpsubb(x, x, op); } +void vpsubd(const Xmm& x, const Operand& op) { vpsubd(x, x, op); } +void vpsubq(const Xmm& x, const Operand& op) { vpsubq(x, x, op); } +void vpsubsb(const Xmm& x, const Operand& op) { vpsubsb(x, x, op); } +void vpsubsw(const Xmm& x, const Operand& op) { vpsubsw(x, x, op); } +void vpsubusb(const Xmm& x, const Operand& op) { vpsubusb(x, x, op); } +void vpsubusw(const Xmm& x, const Operand& op) { vpsubusw(x, x, op); } +void vpsubw(const Xmm& x, const Operand& op) { vpsubw(x, x, op); } +void vpunpckhbw(const Xmm& x, const Operand& op) { vpunpckhbw(x, x, op); } +void vpunpckhdq(const Xmm& x, const Operand& op) { vpunpckhdq(x, x, op); } +void vpunpckhqdq(const Xmm& x, const Operand& op) { vpunpckhqdq(x, x, op); } +void vpunpckhwd(const Xmm& x, const Operand& op) { vpunpckhwd(x, x, op); } +void vpunpcklbw(const Xmm& x, const Operand& op) { vpunpcklbw(x, x, op); } +void vpunpckldq(const Xmm& x, const Operand& op) { vpunpckldq(x, x, op); } +void vpunpcklqdq(const Xmm& x, const Operand& op) { vpunpcklqdq(x, x, op); } +void vpunpcklwd(const Xmm& x, const Operand& op) { vpunpcklwd(x, x, op); } +void vpxor(const Xmm& x, const Operand& op) { vpxor(x, x, op); } +void vrcpss(const Xmm& x, const Operand& op) { vrcpss(x, x, op); } +void vroundsd(const Xmm& x, const Operand& op, uint8_t imm) { vroundsd(x, x, op, imm); } +void vroundss(const Xmm& x, const Operand& op, uint8_t imm) { vroundss(x, x, op, imm); } +void vrsqrtss(const Xmm& x, const Operand& op) { vrsqrtss(x, x, op); } +void vshufpd(const Xmm& x, const Operand& op, uint8_t imm) { vshufpd(x, x, op, imm); } +void vshufps(const Xmm& x, const Operand& op, uint8_t imm) { vshufps(x, x, op, imm); } +void vsqrtsd(const Xmm& x, const Operand& op) { vsqrtsd(x, x, op); } +void vsqrtss(const Xmm& x, const Operand& op) { vsqrtss(x, x, op); } +void vunpckhpd(const Xmm& x, const Operand& op) { vunpckhpd(x, x, op); } +void vunpckhps(const Xmm& x, const Operand& op) { vunpckhps(x, x, op); } +void vunpcklpd(const Xmm& x, const Operand& op) { vunpcklpd(x, x, op); } +void vunpcklps(const Xmm& x, const Operand& op) { vunpcklps(x, x, op); } +#endif +#ifdef XBYAK64 +void jecxz(std::string label) { db(0x67); opJmp(label, T_SHORT, 0xe3, 0, 0); } +void jecxz(const Label& label) { db(0x67); opJmp(label, T_SHORT, 0xe3, 0, 0); } +void jrcxz(std::string label) { opJmp(label, T_SHORT, 0xe3, 0, 0); } +void jrcxz(const Label& label) { opJmp(label, T_SHORT, 0xe3, 0, 0); } +void cdqe() { db(0x48); db(0x98); } +void cqo() { db(0x48); db(0x99); } +void cmpsq() { db(0x48); db(0xA7); } +void popfq() { db(0x9D); } +void pushfq() { db(0x9C); } +void lodsq() { db(0x48); db(0xAD); } +void movsq() { db(0x48); db(0xA5); } +void scasq() { db(0x48); db(0xAF); } +void stosq() { db(0x48); db(0xAB); } +void syscall() { db(0x0F); db(0x05); } +void sysret() { db(0x0F); db(0x07); } +void clui() { db(0xF3); db(0x0F); db(0x01); db(0xEE); } +void stui() { db(0xF3); db(0x0F); db(0x01); db(0xEF); } +void testui() { db(0xF3); db(0x0F); db(0x01); db(0xED); } +void uiret() { db(0xF3); db(0x0F); db(0x01); db(0xEC); } +void cmpxchg16b(const Address& addr) { opMR(addr, Reg64(1), T_0F, 0xC7); } +void fxrstor64(const Address& addr) { opMR(addr, Reg64(1), T_0F, 0xAE); } +void movq(const Reg64& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opSSE(mmx, reg, T_0F, 0x7E); } +void movq(const Mmx& mmx, const Reg64& reg) { if (mmx.isXMM()) db(0x66); opSSE(mmx, reg, T_0F, 0x6E); } +void movrs(const Reg& reg, const Address& addr) { opMR(addr, reg, T_0F38, reg.isBit(8) ? 0x8A : 0x8B); } +void movsxd(const Reg64& reg, const Operand& op) { if (!op.isBit(32)) XBYAK_THROW(ERR_BAD_COMBINATION) opRO(reg, op, T_ALLOW_DIFF_SIZE, 0x63); } +void pextrq(const Operand& op, const Xmm& xmm, uint8_t imm) { if (!op.isREG(64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opSSE(Reg64(xmm.getIdx()), op, T_66 | T_0F3A, 0x16, 0, imm); } +void pinsrq(const Xmm& xmm, const Operand& op, uint8_t imm) { if (!op.isREG(64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opSSE(Reg64(xmm.getIdx()), op, T_66 | T_0F3A, 0x22, 0, imm); } +void senduipi(const Reg64& r) { opRR(Reg32(6), r.cvt32(), T_F3 | T_0F, 0xC7); } +void vcvtss2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W1 | T_EVEX | T_EW1 | T_ER_X | T_N8, 0x2D); } +void vcvttss2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W1 | T_EVEX | T_EW1 | T_SAE_X | T_N8, 0x2C); } +void vcvtsd2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W1 | T_EVEX | T_EW1 | T_N4 | T_ER_X, 0x2D); } +void vcvttsd2si(const Reg64& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W1 | T_EVEX | T_EW1 | T_N4 | T_SAE_X, 0x2C); } +void vmovq(const Xmm& x, const Reg64& r) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x6E); } +void vmovq(const Reg64& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, Xmm(r.getIdx()), T_66 | T_0F | T_W1 | T_EVEX | T_EW1, 0x7E); } +void jmpabs(uint64_t addr) { db(0xD5); db(0x00); db(0xA1); dq(addr); } +void push2(const Reg64& r1, const Reg64& r2) { opROO(r1, r2, Reg64(6), T_APX|T_ND1|T_W0, 0xFF); } +void push2p(const Reg64& r1, const Reg64& r2) { opROO(r1, r2, Reg64(6), T_APX|T_ND1|T_W1, 0xFF); } +void pop2(const Reg64& r1, const Reg64& r2) { opROO(r1, r2, Reg64(0), T_APX|T_ND1|T_W0, 0x8F); } +void pop2p(const Reg64& r1, const Reg64& r2) { opROO(r1, r2, Reg64(0), T_APX|T_ND1|T_W1, 0x8F); } +void cmpbexadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xE6); } +void cmpbxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xE2); } +void cmplexadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xEE); } +void cmplxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xEC); } +void cmpnbexadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xE7); } +void cmpnbxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xE3); } +void cmpnlexadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xEF); } +void cmpnlxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xED); } +void cmpnoxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xE1); } +void cmpnpxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xEB); } +void cmpnsxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xE9); } +void cmpnzxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xE5); } +void cmpoxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xE0); } +void cmppxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xEA); } +void cmpsxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xE8); } +void cmpzxadd(const Address& addr, const Reg32e& r1, const Reg32e& r2) { opRRO(r1, r2, addr, T_APX|T_66|T_0F38, 0xE4); } +void aesdec128kl(const Xmm& x, const Address& addr) { opSSE_APX(x, addr, T_F3|T_0F38, 0xDD, T_F3|T_MUST_EVEX, 0xDD); } +void aesdec256kl(const Xmm& x, const Address& addr) { opSSE_APX(x, addr, T_F3|T_0F38, 0xDF, T_F3|T_MUST_EVEX, 0xDF); } +void aesdecwide128kl(const Address& addr) { opSSE_APX(xmm1, addr, T_F3|T_0F38, 0xD8, T_F3|T_MUST_EVEX, 0xD8); } +void aesdecwide256kl(const Address& addr) { opSSE_APX(xmm3, addr, T_F3|T_0F38, 0xD8, T_F3|T_MUST_EVEX, 0xD8); } +void aesenc128kl(const Xmm& x, const Address& addr) { opSSE_APX(x, addr, T_F3|T_0F38, 0xDC, T_F3|T_MUST_EVEX, 0xDC); } +void aesenc256kl(const Xmm& x, const Address& addr) { opSSE_APX(x, addr, T_F3|T_0F38, 0xDE, T_F3|T_MUST_EVEX, 0xDE); } +void aesencwide128kl(const Address& addr) { opSSE_APX(xmm0, addr, T_F3|T_0F38, 0xD8, T_F3|T_MUST_EVEX, 0xD8); } +void aesencwide256kl(const Address& addr) { opSSE_APX(xmm2, addr, T_F3|T_0F38, 0xD8, T_F3|T_MUST_EVEX, 0xD8); } +void encodekey128(const Reg32& r1, const Reg32& r2) { opEncodeKey(r1, r2, 0xFA, 0xDA); } +void encodekey256(const Reg32& r1, const Reg32& r2) { opEncodeKey(r1, r2, 0xFB, 0xDB); } +void rdfsbase(const Reg32e& r) { opRR(eax, r, T_F3|T_0F|T_ALLOW_DIFF_SIZE, 0xAE); } +void rdgsbase(const Reg32e& r) { opRR(ecx, r, T_F3|T_0F|T_ALLOW_DIFF_SIZE, 0xAE); } +void wrfsbase(const Reg32e& r) { opRR(edx, r, T_F3|T_0F|T_ALLOW_DIFF_SIZE, 0xAE); } +void wrgsbase(const Reg32e& r) { opRR(ebx, r, T_F3|T_0F|T_ALLOW_DIFF_SIZE, 0xAE); } +void tdpbssd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F2|T_0F38|T_W0, 0x5E); } +void tdpbsud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3|T_0F38|T_W0, 0x5E); } +void tdpbusd(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_66|T_0F38|T_W0, 0x5E); } +void tdpbuud(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_0F38|T_W0, 0x5E); } +void tdpfp16ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F2|T_0F38|T_W0, 0x5C); } +void tdpbf16ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3|T_0F38|T_W0, 0x5C); } +void tdpbf8ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_MAP5|T_W0, 0xFD); } +void tdpbhf8ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F2|T_MAP5|T_W0, 0xFD); } +void tdphbf8ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3|T_MAP5|T_W0, 0xFD); } +void tdphf8ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_66|T_MAP5|T_W0, 0xFD); } +void tmmultf32ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_66|T_0F38|T_W0, 0x48); } +void tcmmimfp16ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_66|T_0F38|T_W0, 0x6C); } +void tcmmrlfp16ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_0F38|T_W0, 0x6C); } +void tconjtcmmimfp16ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_0F38|T_W0, 0x6B); } +void ttcmmimfp16ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F2|T_0F38|T_W0, 0x6B); } +void ttcmmrlfp16ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3|T_0F38|T_W0, 0x6B); } +void ttdpbf16ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F3|T_0F38|T_W0, 0x6C); } +void ttdpfp16ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_F2|T_0F38|T_W0, 0x6C); } +void ttmmultf32ps(const Tmm& x1, const Tmm& x2, const Tmm& x3) { opVex(x1, &x3, x2, T_0F38|T_W0, 0x48); } +void tileloadd(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_F2|T_0F38|T_W0, 0x4B); } +void tileloaddt1(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_66|T_0F38|T_W0, 0x4B); } +void tileloaddrs(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_F2|T_0F38|T_W0, 0x4A); } +void tileloaddrst1(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_66|T_0F38|T_W0, 0x4A); } +void t2rpntlvwz0(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_0F38|T_W0, 0x6E); } +void t2rpntlvwz0t1(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_0F38|T_W0, 0x6F); } +void t2rpntlvwz1(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_66|T_0F38|T_W0, 0x6E); } +void t2rpntlvwz1t1(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_66|T_0F38|T_W0, 0x6F); } +void t2rpntlvwz0rs(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_MAP5|T_W0, 0xF8); } +void t2rpntlvwz0rst1(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_MAP5|T_W0, 0xF9); } +void t2rpntlvwz1rs(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_66|T_MAP5|T_W0, 0xF8); } +void t2rpntlvwz1rst1(const Tmm& tm, const Address& addr) { opAMX(tm, addr, T_66|T_MAP5|T_W0, 0xF9); } +void ldtilecfg(const Address& addr) { opAMX(tmm0, addr, T_0F38|T_W0, 0x49); } +void sttilecfg(const Address& addr) { opAMX(tmm0, addr, T_66|T_0F38|T_W0, 0x49); } +void tilestored(const Address& addr, const Tmm& tm) { opAMX(tm, addr, T_F3|T_0F38|T_W0, 0x4B); } +void tilerelease() { db(0xc4); db(0xe2); db(0x78); db(0x49); db(0xc0); } +void tilezero(const Tmm& t) { opVex(t, &tmm0, tmm0, T_F2|T_0F38|T_W0, 0x49); } +void tconjtfp16(const Tmm& t1, const Tmm& t2) { opVex(t1, 0, t2, T_66|T_0F38|T_W0, 0x6B); } +void ttransposed(const Tmm& t1, const Tmm& t2) { opVex(t1, 0, t2, T_F3|T_0F38|T_W0, 0x5F); } +#else +void jcxz(std::string label) { db(0x67); opJmp(label, T_SHORT, 0xe3, 0, 0); } +void jcxz(const Label& label) { db(0x67); opJmp(label, T_SHORT, 0xe3, 0, 0); } +void jecxz(std::string label) { opJmp(label, T_SHORT, 0xe3, 0, 0); } +void jecxz(const Label& label) { opJmp(label, T_SHORT, 0xe3, 0, 0); } +void aaa() { db(0x37); } +void aad() { db(0xD5); db(0x0A); } +void aam() { db(0xD4); db(0x0A); } +void aas() { db(0x3F); } +void daa() { db(0x27); } +void das() { db(0x2F); } +void into() { db(0xCE); } +void popad() { db(0x61); } +void popfd() { db(0x9D); } +void pusha() { db(0x60); } +void pushad() { db(0x60); } +void pushfd() { db(0x9C); } +void popa() { db(0x61); } +void lds(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, T_NONE, 0xC5); } +void les(const Reg& reg, const Address& addr) { opLoadSeg(addr, reg, T_NONE, 0xC4); } +#endif +#ifndef XBYAK_NO_OP_NAMES +void and(const Operand& op1, const Operand& op2) { and_(op1, op2); } +void and(const Operand& op, uint32_t imm) { and_(op, imm); } +void or(const Operand& op1, const Operand& op2) { or_(op1, op2); } +void or(const Operand& op, uint32_t imm) { or_(op, imm); } +void xor(const Operand& op1, const Operand& op2) { xor_(op1, op2); } +void xor(const Operand& op, uint32_t imm) { xor_(op, imm); } +void not(const Operand& op) { not_(op); } +#endif +#ifndef XBYAK_DISABLE_AVX512 +void kaddb(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_66 | T_W0, 0x4A); } +void kaddd(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_66 | T_W1, 0x4A); } +void kaddq(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W1, 0x4A); } +void kaddw(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W0, 0x4A); } +void kandb(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_66 | T_W0, 0x41); } +void kandd(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_66 | T_W1, 0x41); } +void kandnb(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_66 | T_W0, 0x42); } +void kandnd(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_66 | T_W1, 0x42); } +void kandnq(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W1, 0x42); } +void kandnw(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W0, 0x42); } +void kandq(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W1, 0x41); } +void kandw(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W0, 0x41); } +void kmovb(const Address& addr, const Opmask& k) { opKmov(k, addr, true, 8); } +void kmovb(const Opmask& k, const Operand& op) { opKmov(k, op, false, 8); } +void kmovb(const Reg32& r, const Opmask& k) { opKmov(k, r, true, 8); } +void kmovd(const Address& addr, const Opmask& k) { opKmov(k, addr, true, 32); } +void kmovd(const Opmask& k, const Operand& op) { opKmov(k, op, false, 32); } +void kmovd(const Reg32& r, const Opmask& k) { opKmov(k, r, true, 32); } +void kmovq(const Address& addr, const Opmask& k) { opKmov(k, addr, true, 64); } +void kmovq(const Opmask& k, const Operand& op) { opKmov(k, op, false, 64); } +void kmovw(const Address& addr, const Opmask& k) { opKmov(k, addr, true, 16); } +void kmovw(const Opmask& k, const Operand& op) { opKmov(k, op, false, 16); } +void kmovw(const Reg32& r, const Opmask& k) { opKmov(k, r, true, 16); } +void knotb(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_66 | T_W0, 0x44); } +void knotd(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_66 | T_W1, 0x44); } +void knotq(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_W1, 0x44); } +void knotw(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_W0, 0x44); } +void korb(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_66 | T_W0, 0x45); } +void kord(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_66 | T_W1, 0x45); } +void korq(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W1, 0x45); } +void kortestb(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_66 | T_W0, 0x98); } +void kortestd(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_66 | T_W1, 0x98); } +void kortestq(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_W1, 0x98); } +void kortestw(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_W0, 0x98); } +void korw(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W0, 0x45); } +void kshiftlb(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x32, imm); } +void kshiftld(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x33, imm); } +void kshiftlq(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x33, imm); } +void kshiftlw(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x32, imm); } +void kshiftrb(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x30, imm); } +void kshiftrd(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W0, 0x31, imm); } +void kshiftrq(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x31, imm); } +void kshiftrw(const Opmask& r1, const Opmask& r2, uint8_t imm) { opVex(r1, 0, r2, T_66 | T_0F3A | T_W1, 0x30, imm); } +void ktestb(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_66 | T_W0, 0x99); } +void ktestd(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_66 | T_W1, 0x99); } +void ktestq(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_W1, 0x99); } +void ktestw(const Opmask& r1, const Opmask& r2) { opVex(r1, 0, r2, T_0F | T_W0, 0x99); } +void kunpckbw(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_66 | T_W0, 0x4B); } +void kunpckdq(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W1, 0x4B); } +void kunpckwd(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W0, 0x4B); } +void kxnorb(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_66 | T_W0, 0x46); } +void kxnord(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_66 | T_W1, 0x46); } +void kxnorq(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W1, 0x46); } +void kxnorw(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W0, 0x46); } +void kxorb(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_66 | T_W0, 0x47); } +void kxord(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_66 | T_W1, 0x47); } +void kxorq(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W1, 0x47); } +void kxorw(const Opmask& r1, const Opmask& r2, const Opmask& r3) { opVex(r1, &r2, r3, T_L1 | T_0F | T_W0, 0x47); } +void v4fmaddps(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_W0 | T_YMM | T_MUST_EVEX | T_N16, 0x9A); } +void v4fmaddss(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_F2 | T_W0 | T_MUST_EVEX | T_N16, 0x9B); } +void v4fnmaddps(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_W0 | T_YMM | T_MUST_EVEX | T_N16, 0xAA); } +void v4fnmaddss(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_F2 | T_W0 | T_MUST_EVEX | T_N16, 0xAB); } +void vaddbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP5|T_W0|T_YMM|T_MUST_EVEX|T_B16, 0x58); } +void vaddph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_W0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x58); } +void vaddsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_W0 | T_MUST_EVEX | T_ER_X | T_N2, 0x58); } +void valignd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_W0|T_YMM|T_MUST_EVEX, 0x03, imm); } +void valignq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_EW1|T_YMM|T_MUST_EVEX, 0x03, imm); } +void vblendmpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x65); } +void vblendmps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM|T_MUST_EVEX|T_B32, 0x65); } +void vbroadcastf32x2(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_W0 | T_N8, 0x19); } +void vbroadcastf32x4(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_W0 | T_N16, 0x1A); } +void vbroadcastf32x8(const Zmm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_W0 | T_N32, 0x1B); } +void vbroadcastf64x2(const Ymm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N16, 0x1A); } +void vbroadcastf64x4(const Zmm& y, const Address& addr) { opAVX_X_XM_IMM(y, addr, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N32, 0x1B); } +void vbroadcasti32x2(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_W0 | T_N8, 0x59); } +void vbroadcasti32x4(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_W0 | T_N16, 0x5A); } +void vbroadcasti32x8(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_W0 | T_N32, 0x5B); } +void vbroadcasti64x2(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N16, 0x5A); } +void vbroadcasti64x4(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N32, 0x5B); } +void vcmpbf16(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opVex(k, &x, op, T_MUST_EVEX|T_F2|T_0F3A|T_W0|T_YMM|T_B16, 0xC2, imm); } +void vcmpeq_ospd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 16); } +void vcmpeq_osps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 16); } +void vcmpeq_ossd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 16); } +void vcmpeq_osss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 16); } +void vcmpeq_uqpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 8); } +void vcmpeq_uqps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 8); } +void vcmpeq_uqsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 8); } +void vcmpeq_uqss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 8); } +void vcmpeq_uspd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 24); } +void vcmpeq_usps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 24); } +void vcmpeq_ussd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 24); } +void vcmpeq_usss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 24); } +void vcmpeqpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 0); } +void vcmpeqps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 0); } +void vcmpeqsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 0); } +void vcmpeqss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 0); } +void vcmpfalse_ospd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 27); } +void vcmpfalse_osps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 27); } +void vcmpfalse_ossd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 27); } +void vcmpfalse_osss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 27); } +void vcmpfalsepd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 11); } +void vcmpfalseps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 11); } +void vcmpfalsesd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 11); } +void vcmpfalsess(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 11); } +void vcmpge_oqpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 29); } +void vcmpge_oqps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 29); } +void vcmpge_oqsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 29); } +void vcmpge_oqss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 29); } +void vcmpgepd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 13); } +void vcmpgeps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 13); } +void vcmpgesd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 13); } +void vcmpgess(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 13); } +void vcmpgt_oqpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 30); } +void vcmpgt_oqps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 30); } +void vcmpgt_oqsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 30); } +void vcmpgt_oqss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 30); } +void vcmpgtpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 14); } +void vcmpgtps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 14); } +void vcmpgtsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 14); } +void vcmpgtss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 14); } +void vcmple_oqpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 18); } +void vcmple_oqps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 18); } +void vcmple_oqsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 18); } +void vcmple_oqss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 18); } +void vcmplepd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 2); } +void vcmpleps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 2); } +void vcmplesd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 2); } +void vcmpless(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 2); } +void vcmplt_oqpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 17); } +void vcmplt_oqps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 17); } +void vcmplt_oqsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 17); } +void vcmplt_oqss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 17); } +void vcmpltpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 1); } +void vcmpltps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 1); } +void vcmpltsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 1); } +void vcmpltss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 1); } +void vcmpneq_oqpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 12); } +void vcmpneq_oqps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 12); } +void vcmpneq_oqsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 12); } +void vcmpneq_oqss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 12); } +void vcmpneq_ospd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 28); } +void vcmpneq_osps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 28); } +void vcmpneq_ossd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 28); } +void vcmpneq_osss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 28); } +void vcmpneq_uspd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 20); } +void vcmpneq_usps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 20); } +void vcmpneq_ussd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 20); } +void vcmpneq_usss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 20); } +void vcmpneqpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 4); } +void vcmpneqps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 4); } +void vcmpneqsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 4); } +void vcmpneqss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 4); } +void vcmpnge_uqpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 25); } +void vcmpnge_uqps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 25); } +void vcmpnge_uqsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 25); } +void vcmpnge_uqss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 25); } +void vcmpngepd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 9); } +void vcmpngeps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 9); } +void vcmpngesd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 9); } +void vcmpngess(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 9); } +void vcmpngt_uqpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 26); } +void vcmpngt_uqps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 26); } +void vcmpngt_uqsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 26); } +void vcmpngt_uqss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 26); } +void vcmpngtpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 10); } +void vcmpngtps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 10); } +void vcmpngtsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 10); } +void vcmpngtss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 10); } +void vcmpnle_uqpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 22); } +void vcmpnle_uqps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 22); } +void vcmpnle_uqsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 22); } +void vcmpnle_uqss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 22); } +void vcmpnlepd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 6); } +void vcmpnleps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 6); } +void vcmpnlesd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 6); } +void vcmpnless(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 6); } +void vcmpnlt_uqpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 21); } +void vcmpnlt_uqps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 21); } +void vcmpnlt_uqsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 21); } +void vcmpnlt_uqss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 21); } +void vcmpnltpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 5); } +void vcmpnltps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 5); } +void vcmpnltsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 5); } +void vcmpnltss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 5); } +void vcmpord_spd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 23); } +void vcmpord_sps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 23); } +void vcmpord_ssd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 23); } +void vcmpord_sss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 23); } +void vcmpordpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 7); } +void vcmpordps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 7); } +void vcmpordsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 7); } +void vcmpordss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 7); } +void vcmppd(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66|T_0F|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0xC2, imm); } +void vcmpph(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_0F3A|T_W0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B16, 0xC2, imm); } +void vcmpps(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_0F|T_W0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0xC2, imm); } +void vcmpsd(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_N8|T_F2|T_0F|T_EW1|T_SAE_Z|T_MUST_EVEX, 0xC2, imm); } +void vcmpsh(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_N2|T_F3|T_0F3A|T_W0|T_SAE_X|T_MUST_EVEX, 0xC2, imm); } +void vcmpss(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_N4|T_F3|T_0F|T_W0|T_SAE_Z|T_MUST_EVEX, 0xC2, imm); } +void vcmptrue_uspd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 31); } +void vcmptrue_usps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 31); } +void vcmptrue_ussd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 31); } +void vcmptrue_usss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 31); } +void vcmptruepd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 15); } +void vcmptrueps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 15); } +void vcmptruesd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 15); } +void vcmptruess(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 15); } +void vcmpunord_spd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 19); } +void vcmpunord_sps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 19); } +void vcmpunord_ssd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 19); } +void vcmpunord_sss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 19); } +void vcmpunordpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 3); } +void vcmpunordps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 3); } +void vcmpunordsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 3); } +void vcmpunordss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 3); } +void vcomisbf16(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N2|T_66|T_MAP5|T_W0|T_MUST_EVEX, 0x2F); } +void vcomish(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N2|T_MAP5|T_W0|T_SAE_X|T_MUST_EVEX, 0x2F); } +void vcompresspd(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N8|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x8A); } +void vcompressps(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N4|T_66|T_0F38|T_W0|T_YMM|T_MUST_EVEX, 0x8A); } +void vcomxsd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N8|T_F2|T_0F|T_EW1|T_SAE_X|T_MUST_EVEX, 0x2F); } +void vcomxsh(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N2|T_F3|T_MAP5|T_W0|T_SAE_X|T_MUST_EVEX, 0x2F); } +void vcomxss(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N4|T_F3|T_0F|T_W0|T_SAE_X|T_MUST_EVEX, 0x2F); } +void vcvt2ph2bf8(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N1|T_F2|T_0F38|T_W0|T_YMM|T_MUST_EVEX|T_B16, 0x74); } +void vcvt2ph2bf8s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N1|T_F2|T_MAP5|T_W0|T_YMM|T_MUST_EVEX|T_B16, 0x74); } +void vcvt2ph2hf8(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N1|T_F2|T_MAP5|T_W0|T_YMM|T_MUST_EVEX|T_B16, 0x18); } +void vcvt2ph2hf8s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N1|T_F2|T_MAP5|T_W0|T_YMM|T_MUST_EVEX|T_B16, 0x1B); } +void vcvt2ps2phx(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B32, 0x67); } +void vcvtbf162ibs(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2|T_MAP5|T_W0|T_YMM|T_MUST_EVEX|T_B16, 0x69); } +void vcvtbf162iubs(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2|T_MAP5|T_W0|T_YMM|T_MUST_EVEX|T_B16, 0x6B); } +void vcvtbiasph2bf8(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt6(x1, x2, op, T_0F38|T_W0|T_YMM|T_MUST_EVEX|T_B16, 0x74); } +void vcvtbiasph2bf8s(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt6(x1, x2, op, T_MAP5|T_W0|T_YMM|T_MUST_EVEX|T_B16, 0x74); } +void vcvtbiasph2hf8(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt6(x1, x2, op, T_MAP5|T_W0|T_YMM|T_MUST_EVEX|T_B16, 0x18); } +void vcvtbiasph2hf8s(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt6(x1, x2, op, T_MAP5|T_W0|T_YMM|T_MUST_EVEX|T_B16, 0x1B); } +void vcvtdq2ph(const Xmm& x, const Operand& op) { checkCvt4(x, op); opCvt(x, op, T_N16|T_N_VL|T_MAP5|T_W0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B32, 0x5B); } +void vcvthf82ph(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_MUST_EVEX | T_F2 | T_MAP5 | T_W0 | T_YMM | T_N1, 0x1E); } +void vcvtne2ps2bf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2|T_0F38|T_W0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x72); } +void vcvtpd2ph(const Xmm& x, const Operand& op) { opCvt5(x, op, T_N16|T_N_VL|T_66|T_MAP5|T_EW1|T_ER_Z|T_MUST_EVEX|T_B64, 0x5A); } +void vcvtpd2qq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F|T_EW1|T_YMM|T_ER_Z|T_MUST_EVEX|T_B64, 0x7B); } +void vcvtpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F|T_EW1|T_YMM|T_ER_Z|T_MUST_EVEX|T_B64, 0x79); } +void vcvtpd2uqq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F|T_EW1|T_YMM|T_ER_Z|T_MUST_EVEX|T_B64, 0x79); } +void vcvtph2bf8(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F3|T_0F38|T_W0|T_YMM|T_MUST_EVEX|T_B16, 0x74); } +void vcvtph2bf8s(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F3|T_MAP5|T_W0|T_YMM|T_MUST_EVEX|T_B16, 0x74); } +void vcvtph2dq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8|T_N_VL|T_66|T_MAP5|T_W0|T_YMM|T_ER_Y|T_MUST_EVEX|T_B16, 0x5B); } +void vcvtph2hf8(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F3|T_MAP5|T_W0|T_YMM|T_MUST_EVEX|T_B16, 0x18); } +void vcvtph2hf8s(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F3|T_MAP5|T_W0|T_YMM|T_MUST_EVEX|T_B16, 0x1B); } +void vcvtph2ibs(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5|T_W0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x69); } +void vcvtph2iubs(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5|T_W0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x6B); } +void vcvtph2pd(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, T_N4|T_N_VL|T_MAP5|T_W0|T_YMM|T_SAE_X|T_MUST_EVEX|T_B16, 0x5A); } +void vcvtph2psx(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8|T_N_VL|T_66|T_MAP6|T_W0|T_YMM|T_SAE_Y|T_MUST_EVEX|T_B16, 0x13); } +void vcvtph2qq(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, T_N4|T_N_VL|T_66|T_MAP5|T_W0|T_YMM|T_ER_X|T_MUST_EVEX|T_B16, 0x7B); } +void vcvtph2udq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8|T_N_VL|T_MAP5|T_W0|T_YMM|T_ER_Y|T_MUST_EVEX|T_B16, 0x79); } +void vcvtph2uqq(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, T_N4|T_N_VL|T_66|T_MAP5|T_W0|T_YMM|T_ER_X|T_MUST_EVEX|T_B16, 0x79); } +void vcvtph2uw(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5|T_W0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x7D); } +void vcvtph2w(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_MAP5|T_W0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x7D); } +void vcvtps2ibs(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_MAP5|T_W0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B32, 0x69); } +void vcvtps2iubs(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_MAP5|T_W0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B32, 0x6B); } +void vcvtps2phx(const Xmm& x, const Operand& op) { checkCvt4(x, op); opCvt(x, op, T_N16|T_N_VL|T_66|T_MAP5|T_W0|T_ER_Z|T_MUST_EVEX|T_B32, 0x1D); } +void vcvtps2qq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8|T_N_VL|T_66|T_0F|T_W0|T_YMM|T_ER_Y|T_MUST_EVEX|T_B32, 0x7B); } +void vcvtps2udq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_0F|T_W0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B32, 0x79); } +void vcvtps2uqq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8|T_N_VL|T_66|T_0F|T_W0|T_YMM|T_ER_Y|T_MUST_EVEX|T_B32, 0x79); } +void vcvtqq2pd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3|T_0F|T_EW1|T_YMM|T_ER_Z|T_MUST_EVEX|T_B64, 0xE6); } +void vcvtqq2ph(const Xmm& x, const Operand& op) { opCvt5(x, op, T_N16|T_N_VL|T_MAP5|T_EW1|T_ER_Z|T_MUST_EVEX|T_B64, 0x5B); } +void vcvtqq2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F|T_EW1|T_YMM|T_ER_Z|T_MUST_EVEX|T_B64, 0x5B); } +void vcvtsd2sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_F2|T_MAP5|T_EW1|T_ER_X|T_MUST_EVEX, 0x5A); } +void vcvtsd2usi(const Reg32e& r, const Operand& op) { uint64_t type = (T_N8|T_F2|T_0F|T_ER_X|T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_W0); opVex(r, &xm0, op, type, 0x79); } +void vcvtsh2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_F3|T_MAP5|T_W0|T_SAE_X|T_MUST_EVEX, 0x5A); } +void vcvtsh2si(const Reg32e& r, const Operand& op) { uint64_t type = (T_N2|T_F3|T_MAP5|T_ER_X|T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_W0); opVex(r, &xm0, op, type, 0x2D); } +void vcvtsh2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_MAP6|T_W0|T_SAE_X|T_MUST_EVEX, 0x13); } +void vcvtsh2usi(const Reg32e& r, const Operand& op) { uint64_t type = (T_N2|T_F3|T_MAP5|T_ER_X|T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_W0); opVex(r, &xm0, op, type, 0x79); } +void vcvtsi2sh(const Xmm& x1, const Xmm& x2, const Operand& op) { if (!(x1.isXMM() && x2.isXMM() && op.isBit(32|64))) XBYAK_THROW(ERR_BAD_COMBINATION) uint64_t type = (T_F3|T_MAP5|T_ER_R|T_MUST_EVEX|T_M_K) | (op.isBit(32) ? (T_W0 | T_N4) : (T_EW1 | T_N8)); opVex(x1, &x2, op, type, 0x2A); } +void vcvtss2sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_MAP5|T_W0|T_ER_X|T_MUST_EVEX, 0x1D); } +void vcvtss2usi(const Reg32e& r, const Operand& op) { uint64_t type = (T_N4|T_F3|T_0F|T_ER_X|T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_W0); opVex(r, &xm0, op, type, 0x79); } +void vcvttbf162ibs(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2|T_MAP5|T_W0|T_YMM|T_MUST_EVEX|T_B16, 0x68); } +void vcvttbf162iubs(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2|T_MAP5|T_W0|T_YMM|T_MUST_EVEX|T_B16, 0x6A); } +void vcvttpd2dqs(const Xmm& x, const Operand& op) { opCvt2(x, op, T_MAP5|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x6D); } +void vcvttpd2qq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x7A); } +void vcvttpd2qqs(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_MAP5|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x6D); } +void vcvttpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x78); } +void vcvttpd2udqs(const Xmm& x, const Operand& op) { opCvt2(x, op, T_MAP5|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x6C); } +void vcvttpd2uqq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x78); } +void vcvttpd2uqqs(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_MAP5|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x6C); } +void vcvttph2dq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8|T_N_VL|T_F3|T_MAP5|T_W0|T_YMM|T_SAE_Y|T_MUST_EVEX|T_B16, 0x5B); } +void vcvttph2ibs(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5|T_W0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x68); } +void vcvttph2iubs(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5|T_W0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x6A); } +void vcvttph2qq(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, T_N4|T_N_VL|T_66|T_MAP5|T_W0|T_YMM|T_SAE_X|T_MUST_EVEX|T_B16, 0x7A); } +void vcvttph2udq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8|T_N_VL|T_MAP5|T_W0|T_YMM|T_SAE_Y|T_MUST_EVEX|T_B16, 0x78); } +void vcvttph2uqq(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, T_N4|T_N_VL|T_66|T_MAP5|T_W0|T_YMM|T_SAE_X|T_MUST_EVEX|T_B16, 0x78); } +void vcvttph2uw(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5|T_W0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B16, 0x7C); } +void vcvttph2w(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_MAP5|T_W0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B16, 0x7C); } +void vcvttps2dqs(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5|T_W0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x6D); } +void vcvttps2ibs(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_MAP5|T_W0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B32, 0x68); } +void vcvttps2iubs(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_MAP5|T_W0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B32, 0x6A); } +void vcvttps2qq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8|T_N_VL|T_66|T_0F|T_W0|T_YMM|T_SAE_Y|T_MUST_EVEX|T_B32, 0x7A); } +void vcvttps2qqs(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8|T_N_VL|T_66|T_MAP5|T_W0|T_YMM|T_SAE_Y|T_MUST_EVEX|T_B32, 0x6D); } +void vcvttps2udq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_0F|T_W0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x78); } +void vcvttps2udqs(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5|T_W0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x6C); } +void vcvttps2uqq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8|T_N_VL|T_66|T_0F|T_W0|T_YMM|T_SAE_Y|T_MUST_EVEX|T_B32, 0x78); } +void vcvttps2uqqs(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8|T_N_VL|T_66|T_MAP5|T_W0|T_YMM|T_SAE_Y|T_MUST_EVEX|T_B32, 0x6C); } +void vcvttsd2sis(const Reg32e& r, const Operand& op) { uint64_t type = (T_N8|T_F2|T_MAP5|T_SAE_X|T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_W0); opVex(r, &xm0, op, type, 0x6D); } +void vcvttsd2usi(const Reg32e& r, const Operand& op) { uint64_t type = (T_N8|T_F2|T_0F|T_SAE_X|T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_W0); opVex(r, &xm0, op, type, 0x78); } +void vcvttsd2usis(const Reg32e& r, const Operand& op) { uint64_t type = (T_N8|T_F2|T_MAP5|T_SAE_X|T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_W0); opVex(r, &xm0, op, type, 0x6C); } +void vcvttsh2si(const Reg32e& r, const Operand& op) { uint64_t type = (T_N2|T_F3|T_MAP5|T_SAE_X|T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_W0); opVex(r, &xm0, op, type, 0x2C); } +void vcvttsh2usi(const Reg32e& r, const Operand& op) { uint64_t type = (T_N2|T_F3|T_MAP5|T_SAE_X|T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_W0); opVex(r, &xm0, op, type, 0x78); } +void vcvttss2sis(const Reg32e& r, const Operand& op) { uint64_t type = (T_N4|T_F3|T_MAP5|T_SAE_X|T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_W0); opVex(r, &xm0, op, type, 0x6D); } +void vcvttss2usi(const Reg32e& r, const Operand& op) { uint64_t type = (T_N4|T_F3|T_0F|T_SAE_X|T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_W0); opVex(r, &xm0, op, type, 0x78); } +void vcvttss2usis(const Reg32e& r, const Operand& op) { uint64_t type = (T_N4|T_F3|T_MAP5|T_SAE_X|T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_W0); opVex(r, &xm0, op, type, 0x6C); } +void vcvtudq2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8|T_N_VL|T_F3|T_0F|T_W0|T_YMM|T_MUST_EVEX|T_B32, 0x7A); } +void vcvtudq2ph(const Xmm& x, const Operand& op) { checkCvt4(x, op); opCvt(x, op, T_N16|T_N_VL|T_F2|T_MAP5|T_W0|T_ER_Z|T_MUST_EVEX|T_B32, 0x7A); } +void vcvtudq2ps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2|T_0F|T_W0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B32, 0x7A); } +void vcvtuqq2pd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3|T_0F|T_EW1|T_YMM|T_ER_Z|T_MUST_EVEX|T_B64, 0x7A); } +void vcvtuqq2ph(const Xmm& x, const Operand& op) { opCvt5(x, op, T_N16|T_N_VL|T_F2|T_MAP5|T_EW1|T_ER_Z|T_MUST_EVEX|T_B64, 0x7A); } +void vcvtuqq2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F2|T_0F|T_EW1|T_YMM|T_ER_Z|T_MUST_EVEX|T_B64, 0x7A); } +void vcvtusi2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F2 | T_0F | T_MUST_EVEX, T_W1 | T_EW1 | T_ER_X | T_N8, T_W0 | T_N4, 0x7B); } +void vcvtusi2sh(const Xmm& x1, const Xmm& x2, const Operand& op) { if (!(x1.isXMM() && x2.isXMM() && op.isBit(32|64))) XBYAK_THROW(ERR_BAD_COMBINATION) uint64_t type = (T_F3|T_MAP5|T_ER_R|T_MUST_EVEX|T_M_K) | (op.isBit(32) ? (T_W0 | T_N4) : (T_EW1 | T_N8)); opVex(x1, &x2, op, type, 0x7B); } +void vcvtusi2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F3 | T_0F | T_MUST_EVEX | T_ER_X, T_W1 | T_EW1 | T_N8, T_W0 | T_N4, 0x7B); } +void vcvtuw2ph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2|T_MAP5|T_W0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x7D); } +void vcvtw2ph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3|T_MAP5|T_W0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x7D); } +void vdbpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_W0|T_YMM|T_MUST_EVEX, 0x42, imm); } +void vdivbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP5|T_W0|T_YMM|T_MUST_EVEX|T_B16, 0x5E); } +void vdivph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_W0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x5E); } +void vdivsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_W0 | T_MUST_EVEX | T_ER_X | T_N2, 0x5E); } +void vdpbf16ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3|T_0F38|T_W0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x52); } +void vdpphps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_0F38|T_W0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x52); } +void vexp2pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, 0xC8); } +void vexp2ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_W0 | T_B32 | T_SAE_Z, 0xC8); } +void vexpandpd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N8|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x88); } +void vexpandps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N4|T_66|T_0F38|T_W0|T_YMM|T_MUST_EVEX, 0x88); } +void vextractf32x4(const Operand& op, const Ymm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::XMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N16|T_66|T_0F3A|T_W0|T_YMM|T_MUST_EVEX, 0x19, imm); } +void vextractf32x8(const Operand& op, const Zmm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N32|T_66|T_0F3A|T_W0|T_YMM|T_MUST_EVEX, 0x1B, imm); } +void vextractf64x2(const Operand& op, const Ymm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::XMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N16|T_66|T_0F3A|T_EW1|T_YMM|T_MUST_EVEX, 0x19, imm); } +void vextractf64x4(const Operand& op, const Zmm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N32|T_66|T_0F3A|T_EW1|T_YMM|T_MUST_EVEX, 0x1B, imm); } +void vextracti32x4(const Operand& op, const Ymm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::XMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N16|T_66|T_0F3A|T_W0|T_YMM|T_MUST_EVEX, 0x39, imm); } +void vextracti32x8(const Operand& op, const Zmm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N32|T_66|T_0F3A|T_W0|T_YMM|T_MUST_EVEX, 0x3B, imm); } +void vextracti64x2(const Operand& op, const Ymm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::XMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N16|T_66|T_0F3A|T_EW1|T_YMM|T_MUST_EVEX, 0x39, imm); } +void vextracti64x4(const Operand& op, const Zmm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N32|T_66|T_0F3A|T_EW1|T_YMM|T_MUST_EVEX, 0x3B, imm); } +void vfcmaddcph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2|T_MAP6|T_W0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B32, 0x56); } +void vfcmulcph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2|T_MAP6|T_W0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B32, 0xD6); } +void vfixupimmpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x54, imm); } +void vfixupimmps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_W0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x54, imm); } +void vfixupimmsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F3A|T_EW1|T_SAE_Z|T_MUST_EVEX, 0x55, imm); } +void vfixupimmss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F3A|T_W0|T_SAE_Z|T_MUST_EVEX, 0x55, imm); } +void vfmadd132bf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_MAP6|T_W0|T_YMM|T_MUST_EVEX|T_B16, 0x98); } +void vfmadd132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_W0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x98); } +void vfmadd132sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_W0|T_ER_X|T_MUST_EVEX, 0x99); } +void vfmadd213bf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_MAP6|T_W0|T_YMM|T_MUST_EVEX|T_B16, 0xA8); } +void vfmadd213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_W0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xA8); } +void vfmadd213sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_W0|T_ER_X|T_MUST_EVEX, 0xA9); } +void vfmadd231bf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_MAP6|T_W0|T_YMM|T_MUST_EVEX|T_B16, 0xB8); } +void vfmadd231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_W0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xB8); } +void vfmadd231sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_W0|T_ER_X|T_MUST_EVEX, 0xB9); } +void vfmaddcph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3|T_MAP6|T_W0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B32, 0x56); } +void vfmaddsub132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_W0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x96); } +void vfmaddsub213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_W0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xA6); } +void vfmaddsub231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_W0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xB6); } +void vfmsub132bf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_MAP6|T_W0|T_YMM|T_MUST_EVEX|T_B16, 0x9A); } +void vfmsub132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_W0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x9A); } +void vfmsub132sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_W0|T_ER_X|T_MUST_EVEX, 0x9B); } +void vfmsub213bf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_MAP6|T_W0|T_YMM|T_MUST_EVEX|T_B16, 0xAA); } +void vfmsub213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_W0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xAA); } +void vfmsub213sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_W0|T_ER_X|T_MUST_EVEX, 0xAB); } +void vfmsub231bf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_MAP6|T_W0|T_YMM|T_MUST_EVEX|T_B16, 0xBA); } +void vfmsub231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_W0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xBA); } +void vfmsub231sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_W0|T_ER_X|T_MUST_EVEX, 0xBB); } +void vfmsubadd132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_W0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x97); } +void vfmsubadd213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_W0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xA7); } +void vfmsubadd231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_W0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xB7); } +void vfmulcph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3|T_MAP6|T_W0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B32, 0xD6); } +void vfnmadd132bf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_MAP6|T_W0|T_YMM|T_MUST_EVEX|T_B16, 0x9C); } +void vfnmadd132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_W0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x9C); } +void vfnmadd132sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_W0|T_ER_X|T_MUST_EVEX, 0x9D); } +void vfnmadd213bf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_MAP6|T_W0|T_YMM|T_MUST_EVEX|T_B16, 0xAC); } +void vfnmadd213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_W0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xAC); } +void vfnmadd213sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_W0|T_ER_X|T_MUST_EVEX, 0xAD); } +void vfnmadd231bf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_MAP6|T_W0|T_YMM|T_MUST_EVEX|T_B16, 0xBC); } +void vfnmadd231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_W0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xBC); } +void vfnmadd231sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_W0|T_ER_X|T_MUST_EVEX, 0xBD); } +void vfnmsub132bf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_MAP6|T_W0|T_YMM|T_MUST_EVEX|T_B16, 0x9E); } +void vfnmsub132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_W0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x9E); } +void vfnmsub132sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_W0|T_ER_X|T_MUST_EVEX, 0x9F); } +void vfnmsub213bf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_MAP6|T_W0|T_YMM|T_MUST_EVEX|T_B16, 0xAE); } +void vfnmsub213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_W0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xAE); } +void vfnmsub213sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_W0|T_ER_X|T_MUST_EVEX, 0xAF); } +void vfnmsub231bf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_MAP6|T_W0|T_YMM|T_MUST_EVEX|T_B16, 0xBE); } +void vfnmsub231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_W0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0xBE); } +void vfnmsub231sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_W0|T_ER_X|T_MUST_EVEX, 0xBF); } +void vfpclassbf16(const Opmask& k, const Operand& op, uint8_t imm) { opVex(k.changeBit(op.getBit()), 0, op, T_MUST_EVEX|T_F2|T_0F3A|T_W0|T_YMM|T_B16, 0x66, imm); } +void vfpclasspd(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); } +void vfpclassph(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_0F3A | T_MUST_EVEX | T_YMM | T_W0 | T_B16, 0x66, imm); } +void vfpclassps(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_W0 | T_B32, 0x66, imm); } +void vfpclasssd(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, 0x67, imm); } +void vfpclasssh(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_0F3A | T_MUST_EVEX | T_W0 | T_N2, 0x67, imm); } +void vfpclassss(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_W0 | T_N4, 0x67, imm); } +void vgatherdpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_VSIB, 0x92, 1); } +void vgatherdps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4|T_66|T_0F38|T_W0|T_YMM|T_MUST_EVEX|T_VSIB, 0x92, 0); } +void vgatherpf0dpd(const Address& addr) { opGatherFetch(addr, zm1, T_N8|T_66|T_0F38|T_EW1|T_MUST_EVEX|T_M_K|T_VSIB, 0xC6, Operand::YMM); } +void vgatherpf0dps(const Address& addr) { opGatherFetch(addr, zm1, T_N4|T_66|T_0F38|T_W0|T_MUST_EVEX|T_M_K|T_VSIB, 0xC6, Operand::ZMM); } +void vgatherpf0qpd(const Address& addr) { opGatherFetch(addr, zm1, T_N8|T_66|T_0F38|T_EW1|T_MUST_EVEX|T_M_K|T_VSIB, 0xC7, Operand::ZMM); } +void vgatherpf0qps(const Address& addr) { opGatherFetch(addr, zm1, T_N4|T_66|T_0F38|T_W0|T_MUST_EVEX|T_M_K|T_VSIB, 0xC7, Operand::ZMM); } +void vgatherpf1dpd(const Address& addr) { opGatherFetch(addr, zm2, T_N8|T_66|T_0F38|T_EW1|T_MUST_EVEX|T_M_K|T_VSIB, 0xC6, Operand::YMM); } +void vgatherpf1dps(const Address& addr) { opGatherFetch(addr, zm2, T_N4|T_66|T_0F38|T_W0|T_MUST_EVEX|T_M_K|T_VSIB, 0xC6, Operand::ZMM); } +void vgatherpf1qpd(const Address& addr) { opGatherFetch(addr, zm2, T_N8|T_66|T_0F38|T_EW1|T_MUST_EVEX|T_M_K|T_VSIB, 0xC7, Operand::ZMM); } +void vgatherpf1qps(const Address& addr) { opGatherFetch(addr, zm2, T_N4|T_66|T_0F38|T_W0|T_MUST_EVEX|T_M_K|T_VSIB, 0xC7, Operand::ZMM); } +void vgatherqpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_VSIB, 0x93, 0); } +void vgatherqps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4|T_66|T_0F38|T_W0|T_YMM|T_MUST_EVEX|T_VSIB, 0x93, 2); } +void vgetexpbf16(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP6|T_W0|T_YMM|T_MUST_EVEX|T_B16, 0x42); } +void vgetexppd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F38|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x42); } +void vgetexpph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_MAP6|T_W0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B16, 0x42); } +void vgetexpps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F38|T_W0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x42); } +void vgetexpsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_EW1|T_SAE_X|T_MUST_EVEX, 0x43); } +void vgetexpsh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_W0|T_SAE_X|T_MUST_EVEX, 0x43); } +void vgetexpss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_W0|T_SAE_X|T_MUST_EVEX, 0x43); } +void vgetmantbf16(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_F2|T_0F3A|T_W0|T_YMM|T_MUST_EVEX|T_B16, 0x26, imm); } +void vgetmantpd(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66|T_0F3A|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x26, imm); } +void vgetmantph(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_0F3A|T_W0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B16, 0x26, imm); } +void vgetmantps(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66|T_0F3A|T_W0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x26, imm); } +void vgetmantsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F3A|T_EW1|T_SAE_X|T_MUST_EVEX, 0x27, imm); } +void vgetmantsh(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N2|T_0F3A|T_W0|T_SAE_X|T_MUST_EVEX, 0x27, imm); } +void vgetmantss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F3A|T_W0|T_SAE_X|T_MUST_EVEX, 0x27, imm); } +void vinsertf32x4(const Ymm& r1, const Ymm& r2, const Operand& op, uint8_t imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N16|T_66|T_0F3A|T_W0|T_YMM|T_MUST_EVEX, 0x18, imm); } +void vinsertf32x8(const Zmm& r1, const Zmm& r2, const Operand& op, uint8_t imm) {if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N32|T_66|T_0F3A|T_W0|T_YMM|T_MUST_EVEX, 0x1A, imm); } +void vinsertf64x2(const Ymm& r1, const Ymm& r2, const Operand& op, uint8_t imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N16|T_66|T_0F3A|T_EW1|T_YMM|T_MUST_EVEX, 0x18, imm); } +void vinsertf64x4(const Zmm& r1, const Zmm& r2, const Operand& op, uint8_t imm) {if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N32|T_66|T_0F3A|T_EW1|T_YMM|T_MUST_EVEX, 0x1A, imm); } +void vinserti32x4(const Ymm& r1, const Ymm& r2, const Operand& op, uint8_t imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N16|T_66|T_0F3A|T_W0|T_YMM|T_MUST_EVEX, 0x38, imm); } +void vinserti32x8(const Zmm& r1, const Zmm& r2, const Operand& op, uint8_t imm) {if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N32|T_66|T_0F3A|T_W0|T_YMM|T_MUST_EVEX, 0x3A, imm); } +void vinserti64x2(const Ymm& r1, const Ymm& r2, const Operand& op, uint8_t imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N16|T_66|T_0F3A|T_EW1|T_YMM|T_MUST_EVEX, 0x38, imm); } +void vinserti64x4(const Zmm& r1, const Zmm& r2, const Operand& op, uint8_t imm) {if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N32|T_66|T_0F3A|T_EW1|T_YMM|T_MUST_EVEX, 0x3A, imm); } +void vmaxbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP5|T_W0|T_YMM|T_MUST_EVEX|T_B16, 0x5F); } +void vmaxph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_W0 | T_YMM | T_MUST_EVEX | T_SAE_Z | T_B16, 0x5F); } +void vmaxsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_W0 | T_MUST_EVEX | T_SAE_X | T_N2, 0x5F); } +void vminbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP5|T_W0|T_YMM|T_MUST_EVEX|T_B16, 0x5D); } +void vminmaxbf16(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_F2|T_0F3A|T_W0|T_YMM|T_MUST_EVEX|T_B16, 0x52, imm); } +void vminmaxpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x52, imm); } +void vminmaxph(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_0F3A|T_W0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B16, 0x52, imm); } +void vminmaxps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_W0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x52, imm); } +void vminmaxsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F3A|T_EW1|T_SAE_X|T_MUST_EVEX, 0x53, imm); } +void vminmaxsh(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N2|T_0F3A|T_W0|T_SAE_X|T_MUST_EVEX, 0x53, imm); } +void vminmaxss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F3A|T_W0|T_SAE_X|T_MUST_EVEX, 0x53, imm); } +void vminph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_W0 | T_YMM | T_MUST_EVEX | T_SAE_Z | T_B16, 0x5D); } +void vminsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_W0 | T_MUST_EVEX | T_SAE_X | T_N2, 0x5D); } +void vmovdqa32(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_66|T_0F|T_W0|T_YMM|T_MUST_EVEX|T_M_K, 0x7F); } +void vmovdqa32(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F|T_W0|T_YMM|T_MUST_EVEX, 0x6F); } +void vmovdqa64(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_66|T_0F|T_EW1|T_YMM|T_MUST_EVEX|T_M_K, 0x7F); } +void vmovdqa64(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F|T_EW1|T_YMM|T_MUST_EVEX, 0x6F); } +void vmovdqu16(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F2|T_0F|T_EW1|T_YMM|T_MUST_EVEX|T_M_K, 0x7F); } +void vmovdqu16(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2|T_0F|T_EW1|T_YMM|T_MUST_EVEX, 0x6F); } +void vmovdqu32(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F3|T_0F|T_W0|T_YMM|T_MUST_EVEX|T_M_K, 0x7F); } +void vmovdqu32(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3|T_0F|T_W0|T_YMM|T_MUST_EVEX, 0x6F); } +void vmovdqu64(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F3|T_0F|T_EW1|T_YMM|T_MUST_EVEX|T_M_K, 0x7F); } +void vmovdqu64(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3|T_0F|T_EW1|T_YMM|T_MUST_EVEX, 0x6F); } +void vmovdqu8(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F2|T_0F|T_W0|T_YMM|T_MUST_EVEX|T_M_K, 0x7F); } +void vmovdqu8(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2|T_0F|T_W0|T_YMM|T_MUST_EVEX, 0x6F); } +void vmovsh(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_N2|T_F3|T_MAP5|T_W0|T_MUST_EVEX|T_M_K, 0x11); } +void vmovsh(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, T_N2|T_F3|T_MAP5|T_W0|T_MUST_EVEX, 0x10); } +void vmovsh(const Xmm& x1, const Xmm& x2, const Xmm& x3) { opAVX_X_X_XM(x1, x2, x3, T_N2|T_F3|T_MAP5|T_W0|T_MUST_EVEX, 0x10); } +void vmpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_0F3A|T_YMM, 0x42, encoding, imm, T_66|T_YMM, T_F3|T_0F3A|T_B32, 1); } +void vmulbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP5|T_W0|T_YMM|T_MUST_EVEX|T_B16, 0x59); } +void vmulph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_W0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x59); } +void vmulsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_W0 | T_MUST_EVEX | T_ER_X | T_N2, 0x59); } +void vp2intersectd(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_W0 | T_B32, 0x68); } +void vp2intersectq(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW1 | T_B64, 0x68); } +void vp4dpwssd(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_W0 | T_YMM | T_MUST_EVEX | T_N16, 0x52); } +void vp4dpwssds(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_W0 | T_YMM | T_MUST_EVEX | T_N16, 0x53); } +void vpabsq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_B64 | T_YMM, 0x1F); } +void vpandd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_W0|T_YMM|T_MUST_EVEX|T_B32, 0xDB); } +void vpandnd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_W0|T_YMM|T_MUST_EVEX|T_B32, 0xDF); } +void vpandnq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0xDF); } +void vpandq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0xDB); } +void vpblendmb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM|T_MUST_EVEX, 0x66); } +void vpblendmd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM|T_MUST_EVEX|T_B32, 0x64); } +void vpblendmq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x64); } +void vpblendmw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x66); } +void vpbroadcastb(const Xmm& x, const Reg8& r) { opVex(x, 0, r, T_66|T_0F38|T_W0|T_YMM|T_MUST_EVEX, 0x7A); } +void vpbroadcastd(const Xmm& x, const Reg32& r) { opVex(x, 0, r, T_66|T_0F38|T_W0|T_YMM|T_MUST_EVEX, 0x7C); } +void vpbroadcastmb2q(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1, 0x2A); } +void vpbroadcastmw2d(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_YMM | T_MUST_EVEX | T_W0, 0x3A); } +void vpbroadcastw(const Xmm& x, const Reg16& r) { opVex(x, 0, r, T_66|T_0F38|T_W0|T_YMM|T_MUST_EVEX, 0x7B); } +void vpcmpb(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66|T_0F3A|T_W0|T_YMM|T_MUST_EVEX, 0x3F, imm); } +void vpcmpd(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66|T_0F3A|T_W0|T_YMM|T_MUST_EVEX|T_B32, 0x1F, imm); } +void vpcmpeqb(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66|T_0F|T_YMM|T_MUST_EVEX, 0x74); } +void vpcmpeqd(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66|T_0F|T_YMM|T_MUST_EVEX|T_B32, 0x76); } +void vpcmpeqq(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x29); } +void vpcmpeqw(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66|T_0F|T_YMM|T_MUST_EVEX, 0x75); } +void vpcmpgtb(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66|T_0F|T_YMM|T_MUST_EVEX, 0x64); } +void vpcmpgtd(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66|T_0F|T_W0|T_YMM|T_MUST_EVEX|T_B32, 0x66); } +void vpcmpgtq(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x37); } +void vpcmpgtw(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66|T_0F|T_YMM|T_MUST_EVEX, 0x65); } +void vpcmpq(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66|T_0F3A|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x1F, imm); } +void vpcmpub(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66|T_0F3A|T_W0|T_YMM|T_MUST_EVEX, 0x3E, imm); } +void vpcmpud(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66|T_0F3A|T_W0|T_YMM|T_MUST_EVEX|T_B32, 0x1E, imm); } +void vpcmpuq(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66|T_0F3A|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x1E, imm); } +void vpcmpuw(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66|T_0F3A|T_EW1|T_YMM|T_MUST_EVEX, 0x3E, imm); } +void vpcmpw(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66|T_0F3A|T_EW1|T_YMM|T_MUST_EVEX, 0x3F, imm); } +void vpcompressb(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N1|T_66|T_0F38|T_W0|T_YMM|T_MUST_EVEX, 0x63); } +void vpcompressd(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N4|T_66|T_0F38|T_W0|T_YMM|T_MUST_EVEX, 0x8B); } +void vpcompressq(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N8|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x8B); } +void vpcompressw(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N2|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x63); } +void vpconflictd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F38|T_W0|T_YMM|T_MUST_EVEX|T_B32, 0xC4); } +void vpconflictq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0xC4); } +void vpdpbssd(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_F2|T_0F38|T_W0|T_YMM, 0x50, encoding, NONE, T_NONE, T_B32, 1); } +void vpdpbssds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_F2|T_0F38|T_W0|T_YMM, 0x51, encoding, NONE, T_NONE, T_B32, 1); } +void vpdpbsud(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_F3|T_0F38|T_W0|T_YMM, 0x50, encoding, NONE, T_NONE, T_B32, 1); } +void vpdpbsuds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_F3|T_0F38|T_W0|T_YMM, 0x51, encoding, NONE, T_NONE, T_B32, 1); } +void vpdpbuud(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_0F38|T_W0|T_YMM, 0x50, encoding, NONE, T_NONE, T_B32, 1); } +void vpdpbuuds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_0F38|T_W0|T_YMM, 0x51, encoding, NONE, T_NONE, T_B32, 1); } +void vpdpwsud(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_F3|T_0F38|T_W0|T_YMM, 0xD2, encoding, NONE, T_NONE, T_B32, 1); } +void vpdpwsuds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_F3|T_0F38|T_W0|T_YMM, 0xD3, encoding, NONE, T_NONE, T_B32, 1); } +void vpdpwusd(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66|T_0F38|T_W0|T_YMM, 0xD2, encoding, NONE, T_NONE, T_B32, 1); } +void vpdpwusds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_66|T_0F38|T_W0|T_YMM, 0xD3, encoding, NONE, T_NONE, T_B32, 1); } +void vpdpwuud(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_0F38|T_W0|T_YMM, 0xD2, encoding, NONE, T_NONE, T_B32, 1); } +void vpdpwuuds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opEncoding(x1, x2, op, T_0F38|T_W0|T_YMM, 0xD3, encoding, NONE, T_NONE, T_B32, 1); } +void vpermb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM|T_MUST_EVEX, 0x8D); } +void vpermi2b(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM|T_MUST_EVEX, 0x75); } +void vpermi2d(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM|T_MUST_EVEX|T_B32, 0x76); } +void vpermi2pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x77); } +void vpermi2ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM|T_MUST_EVEX|T_B32, 0x77); } +void vpermi2q(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x76); } +void vpermi2w(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x75); } +void vpermt2b(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM|T_MUST_EVEX, 0x7D); } +void vpermt2d(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM|T_MUST_EVEX|T_B32, 0x7E); } +void vpermt2pd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x7F); } +void vpermt2ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM|T_MUST_EVEX|T_B32, 0x7F); } +void vpermt2q(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x7E); } +void vpermt2w(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x7D); } +void vpermw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x8D); } +void vpexpandb(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N1|T_66|T_0F38|T_W0|T_YMM|T_SAE_Z|T_MUST_EVEX, 0x62); } +void vpexpandd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N4|T_66|T_0F38|T_W0|T_YMM|T_MUST_EVEX, 0x89); } +void vpexpandq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N8|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x89); } +void vpexpandw(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N2|T_66|T_0F38|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX, 0x62); } +void vpgatherdd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4|T_66|T_0F38|T_W0|T_YMM|T_MUST_EVEX|T_VSIB, 0x90, 0); } +void vpgatherdq(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_VSIB, 0x90, 1); } +void vpgatherqd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4|T_66|T_0F38|T_W0|T_YMM|T_MUST_EVEX|T_VSIB, 0x91, 2); } +void vpgatherqq(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_VSIB, 0x91, 0); } +void vplzcntd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F38|T_W0|T_YMM|T_MUST_EVEX|T_B32, 0x44); } +void vplzcntq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x44); } +void vpmadd52huq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0xB5); } +void vpmadd52luq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0xB4); } +void vpmaxsq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x3D); } +void vpmaxuq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x3F); } +void vpminsq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x39); } +void vpminuq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x3B); } +void vpmovb2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_W0, 0x29); } +void vpmovd2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_W0, 0x39); } +void vpmovdb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4|T_N_VL|T_F3|T_0F38|T_W0|T_YMM|T_MUST_EVEX|T_M_K, 0x31, false); } +void vpmovdw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8|T_N_VL|T_F3|T_0F38|T_W0|T_YMM|T_MUST_EVEX|T_M_K, 0x33, true); } +void vpmovm2b(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_W0, 0x28); } +void vpmovm2d(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_W0, 0x38); } +void vpmovm2q(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x38); } +void vpmovm2w(const Xmm& x, const Opmask& k) { opVex(x, 0, k, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x28); } +void vpmovq2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x39); } +void vpmovqb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N2|T_N_VL|T_F3|T_0F38|T_W0|T_YMM|T_MUST_EVEX|T_M_K, 0x32, false); } +void vpmovqd(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8|T_N_VL|T_F3|T_0F38|T_W0|T_YMM|T_MUST_EVEX|T_M_K, 0x35, true); } +void vpmovqw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4|T_N_VL|T_F3|T_0F38|T_W0|T_YMM|T_MUST_EVEX|T_M_K, 0x34, false); } +void vpmovsdb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4|T_N_VL|T_F3|T_0F38|T_W0|T_YMM|T_MUST_EVEX|T_M_K, 0x21, false); } +void vpmovsdw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8|T_N_VL|T_F3|T_0F38|T_W0|T_YMM|T_MUST_EVEX|T_M_K, 0x23, true); } +void vpmovsqb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N2|T_N_VL|T_F3|T_0F38|T_W0|T_YMM|T_MUST_EVEX|T_M_K, 0x22, false); } +void vpmovsqd(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8|T_N_VL|T_F3|T_0F38|T_W0|T_YMM|T_MUST_EVEX|T_M_K, 0x25, true); } +void vpmovsqw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4|T_N_VL|T_F3|T_0F38|T_W0|T_YMM|T_MUST_EVEX|T_M_K, 0x24, false); } +void vpmovswb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8|T_N_VL|T_F3|T_0F38|T_W0|T_YMM|T_MUST_EVEX|T_M_K, 0x20, true); } +void vpmovusdb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4|T_N_VL|T_F3|T_0F38|T_W0|T_YMM|T_MUST_EVEX|T_M_K, 0x11, false); } +void vpmovusdw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8|T_N_VL|T_F3|T_0F38|T_W0|T_YMM|T_MUST_EVEX|T_M_K, 0x13, true); } +void vpmovusqb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N2|T_N_VL|T_F3|T_0F38|T_W0|T_YMM|T_MUST_EVEX|T_M_K, 0x12, false); } +void vpmovusqd(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8|T_N_VL|T_F3|T_0F38|T_W0|T_YMM|T_MUST_EVEX|T_M_K, 0x15, true); } +void vpmovusqw(const Operand& op, const Xmm& x) { opVmov(op, x, T_N4|T_N_VL|T_F3|T_0F38|T_W0|T_YMM|T_MUST_EVEX|T_M_K, 0x14, false); } +void vpmovuswb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8|T_N_VL|T_F3|T_0F38|T_W0|T_YMM|T_MUST_EVEX|T_M_K, 0x10, true); } +void vpmovw2m(const Opmask& k, const Xmm& x) { opVex(k, 0, x, T_F3 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1, 0x29); } +void vpmovwb(const Operand& op, const Xmm& x) { opVmov(op, x, T_N8|T_N_VL|T_F3|T_0F38|T_W0|T_YMM|T_MUST_EVEX|T_M_K, 0x30, true); } +void vpmullq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x40); } +void vpmultishiftqb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x83); } +void vpopcntb(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F38|T_W0|T_YMM|T_SAE_Z|T_MUST_EVEX, 0x54); } +void vpopcntd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F38|T_W0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x55); } +void vpopcntq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F38|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x55); } +void vpopcntw(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F38|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX, 0x54); } +void vpord(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_W0|T_YMM|T_MUST_EVEX|T_B32, 0xEB); } +void vporq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0xEB); } +void vprold(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 1), x, op, T_66|T_0F|T_W0|T_YMM|T_MUST_EVEX|T_B32, 0x72, imm); } +void vprolq(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 1), x, op, T_66|T_0F|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x72, imm); } +void vprolvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM|T_MUST_EVEX|T_B32, 0x15); } +void vprolvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x15); } +void vprord(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 0), x, op, T_66|T_0F|T_W0|T_YMM|T_MUST_EVEX|T_B32, 0x72, imm); } +void vprorq(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 0), x, op, T_66|T_0F|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x72, imm); } +void vprorvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM|T_MUST_EVEX|T_B32, 0x14); } +void vprorvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x14); } +void vpscatterdd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4|T_66|T_0F38|T_W0|T_YMM|T_MUST_EVEX|T_M_K|T_VSIB, 0xA0, 0); } +void vpscatterdq(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_M_K|T_VSIB, 0xA0, 1); } +void vpscatterqd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4|T_66|T_0F38|T_W0|T_YMM|T_MUST_EVEX|T_M_K|T_VSIB, 0xA1, 2); } +void vpscatterqq(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_M_K|T_VSIB, 0xA1, 0); } +void vpshldd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_W0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x71, imm); } +void vpshldq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x71, imm); } +void vpshldvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x71); } +void vpshldvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x71); } +void vpshldvw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX, 0x70); } +void vpshldw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX, 0x70, imm); } +void vpshrdd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_W0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x73, imm); } +void vpshrdq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x73, imm); } +void vpshrdvd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x73); } +void vpshrdvq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x73); } +void vpshrdvw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX, 0x72); } +void vpshrdw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX, 0x72, imm); } +void vpshufbitqmb(const Opmask& k, const Xmm& x, const Operand& op) { opVex(k, &x, op, T_66 | T_0F38 | T_W0 | T_YMM | T_MUST_EVEX, 0x8F); } +void vpsllvw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x12); } +void vpsraq(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_X_XM(Xmm(x.getKind(), 4), x, op, T_66|T_0F|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x72, imm); } +void vpsraq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N16|T_66|T_0F|T_EW1|T_YMM|T_MUST_EVEX, 0xE2); } +void vpsravq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x46); } +void vpsravw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x11); } +void vpsrlvw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x10); } +void vpternlogd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_W0|T_YMM|T_MUST_EVEX|T_B32, 0x25, imm); } +void vpternlogq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x25, imm); } +void vptestmb(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66|T_0F38|T_W0|T_YMM|T_MUST_EVEX, 0x26); } +void vptestmd(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66|T_0F38|T_W0|T_YMM|T_MUST_EVEX|T_B32, 0x27); } +void vptestmq(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x27); } +void vptestmw(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x26); } +void vptestnmb(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_F3|T_0F38|T_W0|T_YMM|T_MUST_EVEX, 0x26); } +void vptestnmd(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_F3|T_0F38|T_W0|T_YMM|T_MUST_EVEX|T_B32, 0x27); } +void vptestnmq(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_F3|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x27); } +void vptestnmw(const Opmask& k, const Xmm& x, const Operand& op) { opAVX_K_X_XM(k, x, op, T_F3|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x26); } +void vpxord(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_W0|T_YMM|T_MUST_EVEX|T_B32, 0xEF); } +void vpxorq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0xEF); } +void vrangepd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x50, imm); } +void vrangeps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F3A|T_W0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x50, imm); } +void vrangesd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F3A|T_EW1|T_SAE_X|T_MUST_EVEX, 0x51, imm); } +void vrangess(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F3A|T_W0|T_SAE_X|T_MUST_EVEX, 0x51, imm); } +void vrcp14pd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x4C); } +void vrcp14ps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F38|T_W0|T_YMM|T_MUST_EVEX|T_B32, 0x4C); } +void vrcp14sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_EW1|T_MUST_EVEX, 0x4D); } +void vrcp14ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_W0|T_MUST_EVEX, 0x4D); } +void vrcp28pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, 0xCA); } +void vrcp28ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_W0 | T_B32 | T_SAE_Z, 0xCA); } +void vrcp28sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_EW1|T_SAE_X|T_MUST_EVEX, 0xCB); } +void vrcp28ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_W0|T_SAE_X|T_MUST_EVEX, 0xCB); } +void vrcpbf16(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP6|T_W0|T_YMM|T_MUST_EVEX|T_B16, 0x4C); } +void vrcpph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_MAP6|T_W0|T_YMM|T_MUST_EVEX|T_B16, 0x4C); } +void vrcpsh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_W0|T_MUST_EVEX, 0x4D); } +void vreducebf16(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_F2|T_0F3A|T_W0|T_YMM|T_MUST_EVEX|T_B16, 0x56, imm); } +void vreducepd(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66|T_0F3A|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x56, imm); } +void vreduceph(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_0F3A|T_W0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B16, 0x56, imm); } +void vreduceps(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66|T_0F3A|T_W0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x56, imm); } +void vreducesd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F3A|T_EW1|T_SAE_X|T_MUST_EVEX, 0x57, imm); } +void vreducesh(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N2|T_0F3A|T_W0|T_SAE_X|T_MUST_EVEX, 0x57, imm); } +void vreducess(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F3A|T_W0|T_SAE_X|T_MUST_EVEX, 0x57, imm); } +void vrndscalebf16(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_F2|T_0F3A|T_W0|T_YMM|T_MUST_EVEX|T_B16, 0x08, imm); } +void vrndscalepd(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66|T_0F3A|T_EW1|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B64, 0x09, imm); } +void vrndscaleph(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_0F3A|T_W0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B16, 0x08, imm); } +void vrndscaleps(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66|T_0F3A|T_W0|T_YMM|T_SAE_Z|T_MUST_EVEX|T_B32, 0x08, imm); } +void vrndscalesd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F3A|T_EW1|T_SAE_X|T_MUST_EVEX, 0x0B, imm); } +void vrndscalesh(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N2|T_0F3A|T_W0|T_SAE_X|T_MUST_EVEX, 0x0A, imm); } +void vrndscaless(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F3A|T_W0|T_SAE_X|T_MUST_EVEX, 0x0A, imm); } +void vrsqrt14pd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_B64, 0x4E); } +void vrsqrt14ps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_0F38|T_W0|T_YMM|T_MUST_EVEX|T_B32, 0x4E); } +void vrsqrt14sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x4F); } +void vrsqrt14ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_W0|T_YMM|T_MUST_EVEX, 0x4F); } +void vrsqrt28pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, 0xCC); } +void vrsqrt28ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_W0 | T_B32 | T_SAE_Z, 0xCC); } +void vrsqrt28sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_EW1|T_SAE_X|T_MUST_EVEX, 0xCD); } +void vrsqrt28ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_W0|T_SAE_X|T_MUST_EVEX, 0xCD); } +void vrsqrtbf16(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP6|T_W0|T_YMM|T_MUST_EVEX|T_B16, 0x4E); } +void vrsqrtph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_MAP6|T_W0|T_YMM|T_MUST_EVEX|T_B16, 0x4E); } +void vrsqrtsh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_W0|T_MUST_EVEX, 0x4F); } +void vscalefbf16(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP6|T_W0|T_YMM|T_MUST_EVEX|T_B16, 0x2C); } +void vscalefbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_MAP6|T_W0|T_YMM|T_MUST_EVEX|T_B16, 0x2C); } +void vscalefpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_EW1|T_YMM|T_ER_Z|T_MUST_EVEX|T_B64, 0x2C); } +void vscalefph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP6|T_W0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x2C); } +void vscalefps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_0F38|T_W0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B32, 0x2C); } +void vscalefsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8|T_66|T_0F38|T_EW1|T_ER_X|T_MUST_EVEX, 0x2D); } +void vscalefsh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_66|T_MAP6|T_W0|T_ER_X|T_MUST_EVEX, 0x2D); } +void vscalefss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4|T_66|T_0F38|T_W0|T_ER_X|T_MUST_EVEX, 0x2D); } +void vscatterdpd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_M_K|T_VSIB, 0xA2, 1); } +void vscatterdps(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4|T_66|T_0F38|T_W0|T_YMM|T_MUST_EVEX|T_M_K|T_VSIB, 0xA2, 0); } +void vscatterpf0dpd(const Address& addr) { opGatherFetch(addr, zm5, T_N8|T_66|T_0F38|T_EW1|T_MUST_EVEX|T_M_K|T_VSIB, 0xC6, Operand::YMM); } +void vscatterpf0dps(const Address& addr) { opGatherFetch(addr, zm5, T_N4|T_66|T_0F38|T_W0|T_MUST_EVEX|T_M_K|T_VSIB, 0xC6, Operand::ZMM); } +void vscatterpf0qpd(const Address& addr) { opGatherFetch(addr, zm5, T_N8|T_66|T_0F38|T_EW1|T_MUST_EVEX|T_M_K|T_VSIB, 0xC7, Operand::ZMM); } +void vscatterpf0qps(const Address& addr) { opGatherFetch(addr, zm5, T_N4|T_66|T_0F38|T_W0|T_MUST_EVEX|T_M_K|T_VSIB, 0xC7, Operand::ZMM); } +void vscatterpf1dpd(const Address& addr) { opGatherFetch(addr, zm6, T_N8|T_66|T_0F38|T_EW1|T_MUST_EVEX|T_M_K|T_VSIB, 0xC6, Operand::YMM); } +void vscatterpf1dps(const Address& addr) { opGatherFetch(addr, zm6, T_N4|T_66|T_0F38|T_W0|T_MUST_EVEX|T_M_K|T_VSIB, 0xC6, Operand::ZMM); } +void vscatterpf1qpd(const Address& addr) { opGatherFetch(addr, zm6, T_N8|T_66|T_0F38|T_EW1|T_MUST_EVEX|T_M_K|T_VSIB, 0xC7, Operand::ZMM); } +void vscatterpf1qps(const Address& addr) { opGatherFetch(addr, zm6, T_N4|T_66|T_0F38|T_W0|T_MUST_EVEX|T_M_K|T_VSIB, 0xC7, Operand::ZMM); } +void vscatterqpd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8|T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX|T_M_K|T_VSIB, 0xA3, 0); } +void vscatterqps(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4|T_66|T_0F38|T_W0|T_YMM|T_MUST_EVEX|T_M_K|T_VSIB, 0xA3, 2); } +void vshuff32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_W0 | T_B32, 0x23, imm); } +void vshuff64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x23, imm); } +void vshufi32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_W0 | T_B32, 0x43, imm); } +void vshufi64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x43, imm); } +void vsqrtbf16(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66|T_MAP5|T_W0|T_YMM|T_MUST_EVEX|T_B16, 0x51); } +void vsqrtph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5|T_W0|T_YMM|T_ER_Z|T_MUST_EVEX|T_B16, 0x51); } +void vsqrtsh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2|T_F3|T_MAP5|T_W0|T_ER_X|T_MUST_EVEX, 0x51); } +void vsubbf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66|T_MAP5|T_W0|T_YMM|T_MUST_EVEX|T_B16, 0x5C); } +void vsubph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_W0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x5C); } +void vsubsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_W0 | T_MUST_EVEX | T_ER_X | T_N2, 0x5C); } +void vucomish(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N2|T_MAP5|T_W0|T_SAE_X|T_MUST_EVEX, 0x2E); } +void vucomxsd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N8|T_F2|T_0F|T_EW1|T_SAE_X|T_MUST_EVEX, 0x2E); } +void vucomxsh(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N2|T_F3|T_MAP5|T_W0|T_SAE_X|T_MUST_EVEX, 0x2E); } +void vucomxss(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_N4|T_F3|T_0F|T_W0|T_SAE_X|T_MUST_EVEX, 0x2E); } +#ifdef XBYAK64 +void kmovq(const Reg64& r, const Opmask& k) { opKmov(k, r, true, 64); } +void tcvtrowd2ps(const Zmm& z, const Tmm& t, const Reg32& r) { opVex(z, &r, t, T_F3|T_0F38|T_W0|T_MUST_EVEX, 0x4A); } +void tcvtrowd2ps(const Zmm& z, const Tmm& t, uint8_t imm) { opVex(z, 0, t, T_F3|T_0F3A|T_W0|T_MUST_EVEX, 0x07, imm); } +void tcvtrowps2bf16h(const Zmm& z, const Tmm& t, const Reg32& r) { opVex(z, &r, t, T_F2|T_0F38|T_W0|T_MUST_EVEX, 0x6D); } +void tcvtrowps2bf16h(const Zmm& z, const Tmm& t, uint8_t imm) { opVex(z, 0, t, T_F2|T_0F3A|T_W0|T_MUST_EVEX, 0x07, imm); } +void tcvtrowps2bf16l(const Zmm& z, const Tmm& t, const Reg32& r) { opVex(z, &r, t, T_F3|T_0F38|T_W0|T_MUST_EVEX, 0x6D); } +void tcvtrowps2bf16l(const Zmm& z, const Tmm& t, uint8_t imm) { opVex(z, 0, t, T_F3|T_0F3A|T_W0|T_MUST_EVEX, 0x77, imm); } +void tcvtrowps2phh(const Zmm& z, const Tmm& t, const Reg32& r) { opVex(z, &r, t, T_0F38|T_W0|T_MUST_EVEX, 0x6D); } +void tcvtrowps2phh(const Zmm& z, const Tmm& t, uint8_t imm) { opVex(z, 0, t, T_0F3A|T_W0|T_MUST_EVEX, 0x07, imm); } +void tcvtrowps2phl(const Zmm& z, const Tmm& t, const Reg32& r) { opVex(z, &r, t, T_66|T_0F38|T_W0|T_MUST_EVEX, 0x6D); } +void tcvtrowps2phl(const Zmm& z, const Tmm& t, uint8_t imm) { opVex(z, 0, t, T_F2|T_0F3A|T_W0|T_MUST_EVEX, 0x77, imm); } +void tilemovrow(const Zmm& z, const Tmm& t, const Reg32& r) { opVex(z, &r, t, T_66|T_0F38|T_W0|T_MUST_EVEX, 0x4A); } +void tilemovrow(const Zmm& z, const Tmm& t, uint8_t imm) { opVex(z, 0, t, T_66|T_0F3A|T_W0|T_MUST_EVEX, 0x07, imm); } +void vmovrsb(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_F2|T_MAP5|T_W0|T_MUST_EVEX, 0x6F); } +void vmovrsd(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_F3|T_MAP5|T_W0|T_MUST_EVEX, 0x6F); } +void vmovrsq(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_F3|T_MAP5|T_EW1|T_MUST_EVEX, 0x6F); } +void vmovrsw(const Xmm& x, const Address& addr) { opVex(x, 0, addr, T_F2|T_MAP5|T_EW1|T_MUST_EVEX, 0x6F); } +void vpbroadcastq(const Xmm& x, const Reg64& r) { opVex(x, 0, r, T_66|T_0F38|T_EW1|T_YMM|T_MUST_EVEX, 0x7C); } +#endif +#endif diff --git a/wx_key/vendor/xbyak/xbyak/xbyak_util.h b/wx_key/vendor/xbyak/xbyak/xbyak_util.h new file mode 100644 index 0000000..3cee6c1 --- /dev/null +++ b/wx_key/vendor/xbyak/xbyak/xbyak_util.h @@ -0,0 +1,1173 @@ +#ifndef XBYAK_XBYAK_UTIL_H_ +#define XBYAK_XBYAK_UTIL_H_ + +#ifdef XBYAK_ONLY_CLASS_CPU +#include +#include +#include +#ifndef XBYAK_THROW + #define XBYAK_THROW(x) ; + #define XBYAK_THROW_RET(x, y) return y; +#endif +#ifndef XBYAK_CONSTEXPR +#if ((__cplusplus >= 201402L) && !(!defined(__clang__) && defined(__GNUC__) && (__GNUC__ <= 5))) || (defined(_MSC_VER) && _MSC_VER >= 1910) + #define XBYAK_CONSTEXPR constexpr +#else + #define XBYAK_CONSTEXPR +#endif +#endif +#else +#include + +/** + utility class and functions for Xbyak + Xbyak::util::Clock ; rdtsc timer + Xbyak::util::Cpu ; detect CPU +*/ +#include "xbyak.h" +#endif // XBYAK_ONLY_CLASS_CPU + +#if defined(__i386__) || (defined(__x86_64__) && !defined(__arm64ec__)) || defined(_M_IX86) || (defined(_M_X64) && !defined(_M_ARM64EC)) + #define XBYAK_INTEL_CPU_SPECIFIC +#endif + +#ifdef XBYAK_INTEL_CPU_SPECIFIC +#ifdef _WIN32 + #if defined(_MSC_VER) && (_MSC_VER < 1400) && defined(XBYAK32) + static inline __declspec(naked) void __cpuid(int[4], int) + { + __asm { + push ebx + push esi + mov eax, dword ptr [esp + 4 * 2 + 8] // eaxIn + cpuid + mov esi, dword ptr [esp + 4 * 2 + 4] // data + mov dword ptr [esi], eax + mov dword ptr [esi + 4], ebx + mov dword ptr [esi + 8], ecx + mov dword ptr [esi + 12], edx + pop esi + pop ebx + ret + } + } + #else + #include // for __cpuid + #endif +#else + #ifndef __GNUC_PREREQ + #define __GNUC_PREREQ(major, minor) ((((__GNUC__) << 16) + (__GNUC_MINOR__)) >= (((major) << 16) + (minor))) + #endif + #if __GNUC_PREREQ(4, 3) && !defined(__APPLE__) + #include + #else + #if defined(__APPLE__) && defined(XBYAK32) // avoid err : can't find a register in class `BREG' while reloading `asm' + #define __cpuid(eaxIn, a, b, c, d) __asm__ __volatile__("pushl %%ebx\ncpuid\nmovl %%ebp, %%esi\npopl %%ebx" : "=a"(a), "=S"(b), "=c"(c), "=d"(d) : "0"(eaxIn)) + #define __cpuid_count(eaxIn, ecxIn, a, b, c, d) __asm__ __volatile__("pushl %%ebx\ncpuid\nmovl %%ebp, %%esi\npopl %%ebx" : "=a"(a), "=S"(b), "=c"(c), "=d"(d) : "0"(eaxIn), "2"(ecxIn)) + #else + #define __cpuid(eaxIn, a, b, c, d) __asm__ __volatile__("cpuid\n" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(eaxIn)) + #define __cpuid_count(eaxIn, ecxIn, a, b, c, d) __asm__ __volatile__("cpuid\n" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(eaxIn), "2"(ecxIn)) + #endif + #endif +#endif +#endif + +#ifdef XBYAK_USE_VTUNE + // -I /opt/intel/vtune_amplifier/include/ -L /opt/intel/vtune_amplifier/lib64 -ljitprofiling -ldl + #include + #ifdef _MSC_VER + #pragma comment(lib, "libittnotify.lib") + #endif + #ifdef __linux__ + #include + #endif +#endif +#ifdef __linux__ + #define XBYAK_USE_PERF +#endif + +namespace Xbyak { namespace util { + +typedef enum { + SmtLevel = 1, + CoreLevel = 2 +} CpuTopologyLevel; +typedef CpuTopologyLevel IntelCpuTopologyLevel; // for backward compatibility + +namespace local { + +template +struct TypeT { +}; + +template +XBYAK_CONSTEXPR TypeT operator|(TypeT, TypeT) { return TypeT(); } + +template +inline T max_(T x, T y) { return x >= y ? x : y; } +template +inline T min_(T x, T y) { return x < y ? x : y; } + +} // local + +/** + CPU detection class + @note static inline const member is supported by c++17 or later, so use template hack +*/ +#ifdef _MSC_VER + #pragma warning(push) + #pragma warning(disable : 4459) +#endif +class Cpu { +public: + class Type { + uint64_t L; + uint64_t H; + public: + Type(uint64_t L = 0, uint64_t H = 0) : L(L), H(H) { } + template + Type(local::TypeT) : L(L_), H(H_) {} + Type& operator&=(const Type& rhs) { L &= rhs.L; H &= rhs.H; return *this; } + Type& operator|=(const Type& rhs) { L |= rhs.L; H |= rhs.H; return *this; } + Type operator&(const Type& rhs) const { Type t = *this; t &= rhs; return t; } + Type operator|(const Type& rhs) const { Type t = *this; t |= rhs; return t; } + bool operator==(const Type& rhs) const { return H == rhs.H && L == rhs.L; } + bool operator!=(const Type& rhs) const { return !operator==(rhs); } + // without explicit because backward compatilibity + operator bool() const { return (H | L) != 0; } + uint64_t getL() const { return L; } + uint64_t getH() const { return H; } + }; +private: + Type type_; + //system topology + static const size_t maxTopologyLevels = 2; + uint32_t numCores_[maxTopologyLevels]; + + static const uint32_t maxNumberCacheLevels = 10; + uint32_t dataCacheSize_[maxNumberCacheLevels]; + uint32_t coresSharingDataCache_[maxNumberCacheLevels]; + uint32_t dataCacheLevels_; + uint32_t avx10version_; + + uint32_t get32bitAsBE(const char *x) const + { + return x[0] | (x[1] << 8) | (x[2] << 16) | (x[3] << 24); + } + uint32_t mask(int n) const + { + return (1U << n) - 1; + } + // [ebx:ecx:edx] == s? + bool isEqualStr(uint32_t ebx, uint32_t ecx, uint32_t edx, const char s[12]) const + { + return get32bitAsBE(&s[0]) == ebx && get32bitAsBE(&s[4]) == edx && get32bitAsBE(&s[8]) == ecx; + } + uint32_t extractBit(uint32_t val, uint32_t base, uint32_t end) const + { + return (val >> base) & ((1u << (end + 1 - base)) - 1); + } + void setFamily() + { + uint32_t data[4] = {}; + getCpuid(1, data); + stepping = extractBit(data[0], 0, 3); + model = extractBit(data[0], 4, 7); + family = extractBit(data[0], 8, 11); + //type = extractBit(data[0], 12, 13); + extModel = extractBit(data[0], 16, 19); + extFamily = extractBit(data[0], 20, 27); + if (family == 0x0f) { + displayFamily = family + extFamily; + } else { + displayFamily = family; + } + if ((has(tINTEL) && family == 6) || family == 0x0f) { + displayModel = (extModel << 4) + model; + } else { + displayModel = model; + } + } + void setNumCores() + { + if (!has(tINTEL) && !has(tAMD)) return; + + uint32_t data[4] = {}; + getCpuid(0x0, data); + if (data[0] >= 0xB) { + // Check if "Extended Topology Enumeration" is implemented. + getCpuidEx(0xB, 0, data); + if (data[0] != 0 || data[1] != 0) { + /* + if leaf 11 exists(x2APIC is supported), + we use it to get the number of smt cores and cores on socket + + leaf 0xB can be zeroed-out by a hypervisor + */ + for (uint32_t i = 0; i < maxTopologyLevels; i++) { + getCpuidEx(0xB, i, data); + CpuTopologyLevel level = (CpuTopologyLevel)extractBit(data[2], 8, 15); + if (level == SmtLevel || level == CoreLevel) { + numCores_[level - 1] = extractBit(data[1], 0, 15); + } + } + /* + Fallback values in case a hypervisor has the leaf zeroed-out. + */ + numCores_[SmtLevel - 1] = local::max_(1u, numCores_[SmtLevel - 1]); + numCores_[CoreLevel - 1] = local::max_(numCores_[SmtLevel - 1], numCores_[CoreLevel - 1]); + return; + } + } + // "Extended Topology Enumeration" is not supported. + if (has(tAMD)) { + /* + AMD - Legacy Method + */ + int physicalThreadCount = 0; + getCpuid(0x1, data); + int logicalProcessorCount = extractBit(data[1], 16, 23); + int htt = extractBit(data[3], 28, 28); // Hyper-threading technology. + getCpuid(0x80000000, data); + uint32_t highestExtendedLeaf = data[0]; + if (highestExtendedLeaf >= 0x80000008) { + getCpuid(0x80000008, data); + physicalThreadCount = extractBit(data[2], 0, 7) + 1; + } + if (htt == 0) { + numCores_[SmtLevel - 1] = 1; + numCores_[CoreLevel - 1] = 1; + } else if (physicalThreadCount > 1) { + if ((displayFamily >= 0x17) && (highestExtendedLeaf >= 0x8000001E)) { + // Zen overreports its core count by a factor of two. + getCpuid(0x8000001E, data); + int threadsPerComputeUnit = extractBit(data[1], 8, 15) + 1; + physicalThreadCount /= threadsPerComputeUnit; + } + numCores_[SmtLevel - 1] = logicalProcessorCount / physicalThreadCount; + numCores_[CoreLevel - 1] = logicalProcessorCount; + } else { + numCores_[SmtLevel - 1] = 1; + numCores_[CoreLevel - 1] = logicalProcessorCount > 1 ? logicalProcessorCount : 2; + } + } else { + /* + Intel - Legacy Method + */ + int physicalThreadCount = 0; + getCpuid(0x1, data); + int logicalProcessorCount = extractBit(data[1], 16, 23); + int htt = extractBit(data[3], 28, 28); // Hyper-threading technology. + getCpuid(0, data); + if (data[0] >= 0x4) { + getCpuid(0x4, data); + physicalThreadCount = extractBit(data[0], 26, 31) + 1; + } + if (htt == 0) { + numCores_[SmtLevel - 1] = 1; + numCores_[CoreLevel - 1] = 1; + } else if (physicalThreadCount > 1) { + numCores_[SmtLevel - 1] = logicalProcessorCount / physicalThreadCount; + numCores_[CoreLevel - 1] = logicalProcessorCount; + } else { + numCores_[SmtLevel - 1] = 1; + numCores_[CoreLevel - 1] = logicalProcessorCount > 0 ? logicalProcessorCount : 1; + } + } + } + void setCacheHierarchy() + { + uint32_t data[4] = {}; + if (has(tAMD)) { + getCpuid(0x80000000, data); + if (data[0] >= 0x8000001D) { + // For modern AMD CPUs. + dataCacheLevels_ = 0; + for (uint32_t subLeaf = 0; dataCacheLevels_ < maxNumberCacheLevels; subLeaf++) { + getCpuidEx(0x8000001D, subLeaf, data); + int cacheType = extractBit(data[0], 0, 4); + /* + cacheType + 00h - Null; no more caches + 01h - Data cache + 02h - Instrution cache + 03h - Unified cache + 04h-1Fh - Reserved + */ + if (cacheType == 0) break; // No more caches. + if (cacheType == 0x2) continue; // Skip instruction cache. + int fullyAssociative = extractBit(data[0], 9, 9); + int numSharingCache = extractBit(data[0], 14, 25) + 1; + int cacheNumWays = extractBit(data[1], 22, 31) + 1; + int cachePhysPartitions = extractBit(data[1], 12, 21) + 1; + int cacheLineSize = extractBit(data[1], 0, 11) + 1; + int cacheNumSets = data[2] + 1; + dataCacheSize_[dataCacheLevels_] = + cacheLineSize * cachePhysPartitions * cacheNumWays; + if (fullyAssociative == 0) { + dataCacheSize_[dataCacheLevels_] *= cacheNumSets; + } + if (subLeaf > 0) { + numSharingCache = local::min_(numSharingCache, (int)numCores_[1]); + numSharingCache /= local::max_(1u, coresSharingDataCache_[0]); + } + coresSharingDataCache_[dataCacheLevels_] = numSharingCache; + dataCacheLevels_ += 1; + } + coresSharingDataCache_[0] = local::min_(1u, coresSharingDataCache_[0]); + } else if (data[0] >= 0x80000006) { + // For legacy AMD CPUs, use leaf 0x80000005 for L1 cache + // and 0x80000006 for L2 and L3 cache. + dataCacheLevels_ = 1; + getCpuid(0x80000005, data); + int l1dc_size = extractBit(data[2], 24, 31); + dataCacheSize_[0] = l1dc_size * 1024; + coresSharingDataCache_[0] = 1; + getCpuid(0x80000006, data); + // L2 cache + int l2_assoc = extractBit(data[2], 12, 15); + if (l2_assoc > 0) { + dataCacheLevels_ = 2; + int l2_size = extractBit(data[2], 16, 31); + dataCacheSize_[1] = l2_size * 1024; + coresSharingDataCache_[1] = 1; + } + // L3 cache + int l3_assoc = extractBit(data[3], 12, 15); + if (l3_assoc > 0) { + dataCacheLevels_ = 3; + int l3_size = extractBit(data[3], 18, 31); + dataCacheSize_[2] = l3_size * 512 * 1024; + coresSharingDataCache_[2] = numCores_[1]; + } + } + } else if (has(tINTEL)) { + // Use the "Deterministic Cache Parameters" leaf is supported. + const uint32_t NO_CACHE = 0; + const uint32_t DATA_CACHE = 1; + //const uint32_t INSTRUCTION_CACHE = 2; + const uint32_t UNIFIED_CACHE = 3; + uint32_t smt_width = 0; + uint32_t logical_cores = 0; + + smt_width = numCores_[0]; + logical_cores = numCores_[1]; + + /* + Assumptions: + the first level of data cache is not shared (which is the + case for every existing architecture) and use this to + determine the SMT width for arch not supporting leaf 11. + when leaf 4 reports a number of core less than numCores_ + on socket reported by leaf 11, then it is a correct number + of cores not an upperbound. + */ + for (int i = 0; dataCacheLevels_ < maxNumberCacheLevels; i++) { + getCpuidEx(0x4, i, data); + uint32_t cacheType = extractBit(data[0], 0, 4); + if (cacheType == NO_CACHE) break; + if (cacheType == DATA_CACHE || cacheType == UNIFIED_CACHE) { + uint32_t actual_logical_cores = extractBit(data[0], 14, 25) + 1; + if (logical_cores != 0) { // true only if leaf 0xB is supported and valid + actual_logical_cores = local::min_(actual_logical_cores, logical_cores); + } + assert(actual_logical_cores != 0); + dataCacheSize_[dataCacheLevels_] = + (extractBit(data[1], 22, 31) + 1) + * (extractBit(data[1], 12, 21) + 1) + * (extractBit(data[1], 0, 11) + 1) + * (data[2] + 1); + if (cacheType == DATA_CACHE && smt_width == 0) smt_width = actual_logical_cores; + assert(smt_width != 0); + coresSharingDataCache_[dataCacheLevels_] = local::max_(actual_logical_cores / smt_width, 1u); + dataCacheLevels_++; + } + } + } + } + +public: + int model; + int family; + int stepping; + int extModel; + int extFamily; + int displayFamily; // family + extFamily + int displayModel; // model + extModel + + uint32_t getNumCores(CpuTopologyLevel level) const { + switch (level) { + case SmtLevel: return numCores_[level - 1]; + case CoreLevel: return numCores_[level - 1] / numCores_[SmtLevel - 1]; + default: XBYAK_THROW_RET(ERR_X2APIC_IS_NOT_SUPPORTED, 0) + } + } + + uint32_t getDataCacheLevels() const { return dataCacheLevels_; } + uint32_t getCoresSharingDataCache(uint32_t i) const + { + if (i >= dataCacheLevels_) XBYAK_THROW_RET(ERR_BAD_PARAMETER, 0) + return coresSharingDataCache_[i]; + } + uint32_t getDataCacheSize(uint32_t i) const + { + if (i >= dataCacheLevels_) XBYAK_THROW_RET(ERR_BAD_PARAMETER, 0) + return dataCacheSize_[i]; + } + + /* + data[] = { eax, ebx, ecx, edx } + */ + static inline void getCpuidEx(uint32_t eaxIn, uint32_t ecxIn, uint32_t data[4]) + { +#ifdef XBYAK_INTEL_CPU_SPECIFIC + #ifdef _WIN32 + __cpuidex(reinterpret_cast(data), eaxIn, ecxIn); + #else + __cpuid_count(eaxIn, ecxIn, data[0], data[1], data[2], data[3]); + #endif +#else + (void)eaxIn; + (void)ecxIn; + (void)data; +#endif + } + static inline void getCpuid(uint32_t eaxIn, uint32_t data[4]) + { + getCpuidEx(eaxIn, 0, data); + } + static inline uint64_t getXfeature() + { +#ifdef XBYAK_INTEL_CPU_SPECIFIC + #ifdef _MSC_VER + return _xgetbv(0); + #else + uint32_t eax, edx; + // xgetvb is not support on gcc 4.2 +// __asm__ volatile("xgetbv" : "=a"(eax), "=d"(edx) : "c"(0)); + __asm__ volatile(".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(0)); + return ((uint64_t)edx << 32) | eax; + #endif +#else + return 0; +#endif + } + +#define XBYAK_SPLIT_ID(id) ((0 <= id && id < 64) ? (1ull << (id % 64)) : 0), (id >= 64 ? (1ull << (id % 64)) : 0) +#if (__cplusplus >= 201103) || (defined(_MSC_VER) && (_MSC_VER >= 1700)) /* VS2012 */ + #define XBYAK_DEFINE_TYPE(id, NAME) static const constexpr local::TypeT NAME{} +#else + #define XBYAK_DEFINE_TYPE(id, NAME) static const local::TypeT NAME +#endif + XBYAK_DEFINE_TYPE(0, tMMX); + XBYAK_DEFINE_TYPE(1, tMMX2); + XBYAK_DEFINE_TYPE(2, tCMOV); + XBYAK_DEFINE_TYPE(3, tSSE); + XBYAK_DEFINE_TYPE(4, tSSE2); + XBYAK_DEFINE_TYPE(5, tSSE3); + XBYAK_DEFINE_TYPE(6, tSSSE3); + XBYAK_DEFINE_TYPE(7, tSSE41); + XBYAK_DEFINE_TYPE(8, tSSE42); + XBYAK_DEFINE_TYPE(9, tPOPCNT); + XBYAK_DEFINE_TYPE(10, tAESNI); + XBYAK_DEFINE_TYPE(11, tAVX512_FP16); + XBYAK_DEFINE_TYPE(12, tOSXSAVE); + XBYAK_DEFINE_TYPE(13, tPCLMULQDQ); + XBYAK_DEFINE_TYPE(14, tAVX); + XBYAK_DEFINE_TYPE(15, tFMA); + XBYAK_DEFINE_TYPE(16, t3DN); + XBYAK_DEFINE_TYPE(17, tE3DN); + XBYAK_DEFINE_TYPE(18, tWAITPKG); + XBYAK_DEFINE_TYPE(19, tRDTSCP); + XBYAK_DEFINE_TYPE(20, tAVX2); + XBYAK_DEFINE_TYPE(21, tBMI1); // andn, bextr, blsi, blsmsk, blsr, tzcnt + XBYAK_DEFINE_TYPE(22, tBMI2); // bzhi, mulx, pdep, pext, rorx, sarx, shlx, shrx + XBYAK_DEFINE_TYPE(23, tLZCNT); + XBYAK_DEFINE_TYPE(24, tINTEL); + XBYAK_DEFINE_TYPE(25, tAMD); + XBYAK_DEFINE_TYPE(26, tENHANCED_REP); // enhanced rep movsb/stosb + XBYAK_DEFINE_TYPE(27, tRDRAND); + XBYAK_DEFINE_TYPE(28, tADX); // adcx, adox + XBYAK_DEFINE_TYPE(29, tRDSEED); // rdseed + XBYAK_DEFINE_TYPE(30, tSMAP); // stac + XBYAK_DEFINE_TYPE(31, tHLE); // xacquire, xrelease, xtest + XBYAK_DEFINE_TYPE(32, tRTM); // xbegin, xend, xabort + XBYAK_DEFINE_TYPE(33, tF16C); // vcvtph2ps, vcvtps2ph + XBYAK_DEFINE_TYPE(34, tMOVBE); // mobve + XBYAK_DEFINE_TYPE(35, tAVX512F); + XBYAK_DEFINE_TYPE(36, tAVX512DQ); + XBYAK_DEFINE_TYPE(37, tAVX512_IFMA); + XBYAK_DEFINE_TYPE(37, tAVX512IFMA);// = tAVX512_IFMA; + XBYAK_DEFINE_TYPE(38, tAVX512PF); + XBYAK_DEFINE_TYPE(39, tAVX512ER); + XBYAK_DEFINE_TYPE(40, tAVX512CD); + XBYAK_DEFINE_TYPE(41, tAVX512BW); + XBYAK_DEFINE_TYPE(42, tAVX512VL); + XBYAK_DEFINE_TYPE(43, tAVX512_VBMI); + XBYAK_DEFINE_TYPE(43, tAVX512VBMI); // = tAVX512_VBMI; // changed by Intel's manual + XBYAK_DEFINE_TYPE(44, tAVX512_4VNNIW); + XBYAK_DEFINE_TYPE(45, tAVX512_4FMAPS); + XBYAK_DEFINE_TYPE(46, tPREFETCHWT1); + XBYAK_DEFINE_TYPE(47, tPREFETCHW); + XBYAK_DEFINE_TYPE(48, tSHA); + XBYAK_DEFINE_TYPE(49, tMPX); + XBYAK_DEFINE_TYPE(50, tAVX512_VBMI2); + XBYAK_DEFINE_TYPE(51, tGFNI); + XBYAK_DEFINE_TYPE(52, tVAES); + XBYAK_DEFINE_TYPE(53, tVPCLMULQDQ); + XBYAK_DEFINE_TYPE(54, tAVX512_VNNI); + XBYAK_DEFINE_TYPE(55, tAVX512_BITALG); + XBYAK_DEFINE_TYPE(56, tAVX512_VPOPCNTDQ); + XBYAK_DEFINE_TYPE(57, tAVX512_BF16); + XBYAK_DEFINE_TYPE(58, tAVX512_VP2INTERSECT); + XBYAK_DEFINE_TYPE(59, tAMX_TILE); + XBYAK_DEFINE_TYPE(60, tAMX_INT8); + XBYAK_DEFINE_TYPE(61, tAMX_BF16); + XBYAK_DEFINE_TYPE(62, tAVX_VNNI); + XBYAK_DEFINE_TYPE(63, tCLFLUSHOPT); + XBYAK_DEFINE_TYPE(64, tCLDEMOTE); + XBYAK_DEFINE_TYPE(65, tMOVDIRI); + XBYAK_DEFINE_TYPE(66, tMOVDIR64B); + XBYAK_DEFINE_TYPE(67, tCLZERO); // AMD Zen + XBYAK_DEFINE_TYPE(68, tAMX_FP16); + XBYAK_DEFINE_TYPE(69, tAVX_VNNI_INT8); + XBYAK_DEFINE_TYPE(70, tAVX_NE_CONVERT); + XBYAK_DEFINE_TYPE(71, tAVX_IFMA); + XBYAK_DEFINE_TYPE(72, tRAO_INT); + XBYAK_DEFINE_TYPE(73, tCMPCCXADD); + XBYAK_DEFINE_TYPE(74, tPREFETCHITI); + XBYAK_DEFINE_TYPE(75, tSERIALIZE); + XBYAK_DEFINE_TYPE(76, tUINTR); + XBYAK_DEFINE_TYPE(77, tXSAVE); + XBYAK_DEFINE_TYPE(78, tSHA512); + XBYAK_DEFINE_TYPE(79, tSM3); + XBYAK_DEFINE_TYPE(80, tSM4); + XBYAK_DEFINE_TYPE(81, tAVX_VNNI_INT16); + XBYAK_DEFINE_TYPE(82, tAPX_F); + XBYAK_DEFINE_TYPE(83, tAVX10); + XBYAK_DEFINE_TYPE(84, tAESKLE); + XBYAK_DEFINE_TYPE(85, tWIDE_KL); + XBYAK_DEFINE_TYPE(86, tKEYLOCKER); + XBYAK_DEFINE_TYPE(87, tKEYLOCKER_WIDE); + XBYAK_DEFINE_TYPE(88, tSSE4a); + XBYAK_DEFINE_TYPE(89, tCLWB); + XBYAK_DEFINE_TYPE(90, tTSXLDTRK); + XBYAK_DEFINE_TYPE(91, tAMX_TRANSPOSE); + XBYAK_DEFINE_TYPE(92, tAMX_TF32); + XBYAK_DEFINE_TYPE(93, tAMX_AVX512); + XBYAK_DEFINE_TYPE(94, tAMX_MOVRS); + XBYAK_DEFINE_TYPE(95, tAMX_FP8); + XBYAK_DEFINE_TYPE(96, tMOVRS); + +#undef XBYAK_SPLIT_ID +#undef XBYAK_DEFINE_TYPE + + Cpu() + : type_() + , numCores_() + , dataCacheSize_() + , coresSharingDataCache_() + , dataCacheLevels_(0) + , avx10version_(0) + { + uint32_t data[4] = {}; + const uint32_t& eax = data[0]; + const uint32_t& ebx = data[1]; + const uint32_t& ecx = data[2]; + const uint32_t& edx = data[3]; + getCpuid(0, data); + const uint32_t maxNum = eax; + if (isEqualStr(ebx, ecx, edx, "AuthenticAMD")) { + type_ |= tAMD; + getCpuid(0x80000001, data); + if (edx & (1U << 31)) { + type_ |= t3DN; + // 3DNow! implies support for PREFETCHW on AMD + type_ |= tPREFETCHW; + } + + if (edx & (1U << 29)) { + // Long mode implies support for PREFETCHW on AMD + type_ |= tPREFETCHW; + } + } else if (isEqualStr(ebx, ecx, edx, "GenuineIntel")) { + type_ |= tINTEL; + } + + // Extended flags information + getCpuid(0x80000000, data); + const uint32_t maxExtendedNum = eax; + if (maxExtendedNum >= 0x80000001) { + getCpuid(0x80000001, data); + + if (ecx & (1U << 5)) type_ |= tLZCNT; + if (ecx & (1U << 6)) type_ |= tSSE4a; + if (ecx & (1U << 8)) type_ |= tPREFETCHW; + if (edx & (1U << 15)) type_ |= tCMOV; + if (edx & (1U << 22)) type_ |= tMMX2; + if (edx & (1U << 27)) type_ |= tRDTSCP; + if (edx & (1U << 30)) type_ |= tE3DN; + if (edx & (1U << 31)) type_ |= t3DN; + } + + if (maxExtendedNum >= 0x80000008) { + getCpuid(0x80000008, data); + if (ebx & (1U << 0)) type_ |= tCLZERO; + } + + getCpuid(1, data); + if (ecx & (1U << 0)) type_ |= tSSE3; + if (ecx & (1U << 1)) type_ |= tPCLMULQDQ; + if (ecx & (1U << 9)) type_ |= tSSSE3; + if (ecx & (1U << 19)) type_ |= tSSE41; + if (ecx & (1U << 20)) type_ |= tSSE42; + if (ecx & (1U << 22)) type_ |= tMOVBE; + if (ecx & (1U << 23)) type_ |= tPOPCNT; + if (ecx & (1U << 25)) type_ |= tAESNI; + if (ecx & (1U << 26)) type_ |= tXSAVE; + if (ecx & (1U << 27)) type_ |= tOSXSAVE; + if (ecx & (1U << 29)) type_ |= tF16C; + if (ecx & (1U << 30)) type_ |= tRDRAND; + + if (edx & (1U << 15)) type_ |= tCMOV; + if (edx & (1U << 23)) type_ |= tMMX; + if (edx & (1U << 25)) type_ |= tMMX2 | tSSE; + if (edx & (1U << 26)) type_ |= tSSE2; + + if (type_ & tOSXSAVE) { + // check XFEATURE_ENABLED_MASK[2:1] = '11b' + uint64_t bv = getXfeature(); + if ((bv & 6) == 6) { + if (ecx & (1U << 12)) type_ |= tFMA; + if (ecx & (1U << 28)) type_ |= tAVX; + // do *not* check AVX-512 state on macOS because it has on-demand AVX-512 support +#if !defined(__APPLE__) + if (((bv >> 5) & 7) == 7) +#endif + { + getCpuidEx(7, 0, data); + if (ebx & (1U << 16)) type_ |= tAVX512F; + if (type_ & tAVX512F) { + if (ebx & (1U << 17)) type_ |= tAVX512DQ; + if (ebx & (1U << 21)) type_ |= tAVX512_IFMA; + if (ebx & (1U << 26)) type_ |= tAVX512PF; + if (ebx & (1U << 27)) type_ |= tAVX512ER; + if (ebx & (1U << 28)) type_ |= tAVX512CD; + if (ebx & (1U << 30)) type_ |= tAVX512BW; + if (ebx & (1U << 31)) type_ |= tAVX512VL; + if (ecx & (1U << 1)) type_ |= tAVX512_VBMI; + if (ecx & (1U << 6)) type_ |= tAVX512_VBMI2; + if (ecx & (1U << 11)) type_ |= tAVX512_VNNI; + if (ecx & (1U << 12)) type_ |= tAVX512_BITALG; + if (ecx & (1U << 14)) type_ |= tAVX512_VPOPCNTDQ; + if (edx & (1U << 2)) type_ |= tAVX512_4VNNIW; + if (edx & (1U << 3)) type_ |= tAVX512_4FMAPS; + if (edx & (1U << 8)) type_ |= tAVX512_VP2INTERSECT; + if ((type_ & tAVX512BW) && (edx & (1U << 23))) type_ |= tAVX512_FP16; + } + } + } + } + if (maxNum >= 7) { + getCpuidEx(7, 0, data); + const uint32_t maxNumSubLeaves = eax; + if (type_ & tAVX && (ebx & (1U << 5))) type_ |= tAVX2; + if (ebx & (1U << 3)) type_ |= tBMI1; + if (ebx & (1U << 4)) type_ |= tHLE; + if (ebx & (1U << 8)) type_ |= tBMI2; + if (ebx & (1U << 9)) type_ |= tENHANCED_REP; + if (ebx & (1U << 11)) type_ |= tRTM; + if (ebx & (1U << 14)) type_ |= tMPX; + if (ebx & (1U << 18)) type_ |= tRDSEED; + if (ebx & (1U << 19)) type_ |= tADX; + if (ebx & (1U << 20)) type_ |= tSMAP; + if (ebx & (1U << 23)) type_ |= tCLFLUSHOPT; + if (ebx & (1U << 24)) type_ |= tCLWB; + if (ebx & (1U << 29)) type_ |= tSHA; + if (ecx & (1U << 0)) type_ |= tPREFETCHWT1; + if (ecx & (1U << 5)) type_ |= tWAITPKG; + if (ecx & (1U << 8)) type_ |= tGFNI; + if (ecx & (1U << 9)) type_ |= tVAES; + if (ecx & (1U << 10)) type_ |= tVPCLMULQDQ; + if (ecx & (1U << 23)) type_ |= tKEYLOCKER; + if (ecx & (1U << 25)) type_ |= tCLDEMOTE; + if (ecx & (1U << 27)) type_ |= tMOVDIRI; + if (ecx & (1U << 28)) type_ |= tMOVDIR64B; + if (edx & (1U << 5)) type_ |= tUINTR; + if (edx & (1U << 14)) type_ |= tSERIALIZE; + if (edx & (1U << 16)) type_ |= tTSXLDTRK; + if (edx & (1U << 22)) type_ |= tAMX_BF16; + if (edx & (1U << 24)) type_ |= tAMX_TILE; + if (edx & (1U << 25)) type_ |= tAMX_INT8; + if (maxNumSubLeaves >= 1) { + getCpuidEx(7, 1, data); + if (eax & (1U << 0)) type_ |= tSHA512; + if (eax & (1U << 1)) type_ |= tSM3; + if (eax & (1U << 2)) type_ |= tSM4; + if (eax & (1U << 3)) type_ |= tRAO_INT; + if (eax & (1U << 4)) type_ |= tAVX_VNNI; + if (type_ & tAVX512F) { + if (eax & (1U << 5)) type_ |= tAVX512_BF16; + } + if (eax & (1U << 7)) type_ |= tCMPCCXADD; + if (eax & (1U << 21)) type_ |= tAMX_FP16; + if (eax & (1U << 23)) type_ |= tAVX_IFMA; + if (eax & (1U << 31)) type_ |= tMOVRS; + if (edx & (1U << 4)) type_ |= tAVX_VNNI_INT8; + if (edx & (1U << 5)) type_ |= tAVX_NE_CONVERT; + if (edx & (1U << 10)) type_ |= tAVX_VNNI_INT16; + if (edx & (1U << 14)) type_ |= tPREFETCHITI; + if (edx & (1U << 19)) type_ |= tAVX10; + if (edx & (1U << 21)) type_ |= tAPX_F; + + getCpuidEx(0x1e, 1, data); + if (eax & (1U << 4)) type_ |= tAMX_FP8; + if (eax & (1U << 5)) type_ |= tAMX_TRANSPOSE; + if (eax & (1U << 6)) type_ |= tAMX_TF32; + if (eax & (1U << 7)) type_ |= tAMX_AVX512; + if (eax & (1U << 8)) type_ |= tAMX_MOVRS; + } + } + if (maxNum >= 0x19) { + getCpuidEx(0x19, 0, data); + if (ebx & (1U << 0)) type_ |= tAESKLE; + if (ebx & (1U << 2)) type_ |= tWIDE_KL; + if (type_ & (tKEYLOCKER|tAESKLE|tWIDE_KL)) type_ |= tKEYLOCKER_WIDE; + } + if (has(tAVX10) && maxNum >= 0x24) { + getCpuidEx(0x24, 0, data); + avx10version_ = ebx & mask(7); + } + setFamily(); + setNumCores(); + setCacheHierarchy(); + } + void putFamily() const + { +#ifndef XBYAK_ONLY_CLASS_CPU + printf("family=%d, model=%X, stepping=%d, extFamily=%d, extModel=%X\n", + family, model, stepping, extFamily, extModel); + printf("display:family=%X, model=%X\n", displayFamily, displayModel); +#endif + } + bool has(const Type& type) const + { + return (type & type_) == type; + } + int getAVX10version() const { return avx10version_; } +}; +#ifdef _MSC_VER + #pragma warning(pop) +#endif + +#ifndef XBYAK_ONLY_CLASS_CPU +class Clock { +public: + static inline uint64_t getRdtsc() + { +#ifdef XBYAK_INTEL_CPU_SPECIFIC + #ifdef _MSC_VER + return __rdtsc(); + #else + uint32_t eax, edx; + __asm__ volatile("rdtsc" : "=a"(eax), "=d"(edx)); + return ((uint64_t)edx << 32) | eax; + #endif +#else + // TODO: Need another impl of Clock or rdtsc-equivalent for non-x86 cpu + return 0; +#endif + } + Clock() + : clock_(0) + , count_(0) + { + } + void begin() + { + clock_ -= getRdtsc(); + } + void end() + { + clock_ += getRdtsc(); + count_++; + } + int getCount() const { return count_; } + uint64_t getClock() const { return clock_; } + void clear() { count_ = 0; clock_ = 0; } +private: + uint64_t clock_; + int count_; +}; + +#ifdef XBYAK64 +const int UseRCX = 1 << 6; +const int UseRDX = 1 << 7; + +class Pack { + static const size_t maxTblNum = 15; + Xbyak::Reg64 tbl_[maxTblNum]; + size_t n_; +public: + Pack() : tbl_(), n_(0) {} + Pack(const Xbyak::Reg64 *tbl, size_t n) { init(tbl, n); } + Pack(const Pack& rhs) + : n_(rhs.n_) + { + for (size_t i = 0; i < n_; i++) tbl_[i] = rhs.tbl_[i]; + } + Pack& operator=(const Pack& rhs) + { + n_ = rhs.n_; + for (size_t i = 0; i < n_; i++) tbl_[i] = rhs.tbl_[i]; + return *this; + } + Pack(const Xbyak::Reg64& t0) + { n_ = 1; tbl_[0] = t0; } + Pack(const Xbyak::Reg64& t1, const Xbyak::Reg64& t0) + { n_ = 2; tbl_[0] = t0; tbl_[1] = t1; } + Pack(const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0) + { n_ = 3; tbl_[0] = t0; tbl_[1] = t1; tbl_[2] = t2; } + Pack(const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0) + { n_ = 4; tbl_[0] = t0; tbl_[1] = t1; tbl_[2] = t2; tbl_[3] = t3; } + Pack(const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0) + { n_ = 5; tbl_[0] = t0; tbl_[1] = t1; tbl_[2] = t2; tbl_[3] = t3; tbl_[4] = t4; } + Pack(const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0) + { n_ = 6; tbl_[0] = t0; tbl_[1] = t1; tbl_[2] = t2; tbl_[3] = t3; tbl_[4] = t4; tbl_[5] = t5; } + Pack(const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0) + { n_ = 7; tbl_[0] = t0; tbl_[1] = t1; tbl_[2] = t2; tbl_[3] = t3; tbl_[4] = t4; tbl_[5] = t5; tbl_[6] = t6; } + Pack(const Xbyak::Reg64& t7, const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0) + { n_ = 8; tbl_[0] = t0; tbl_[1] = t1; tbl_[2] = t2; tbl_[3] = t3; tbl_[4] = t4; tbl_[5] = t5; tbl_[6] = t6; tbl_[7] = t7; } + Pack(const Xbyak::Reg64& t8, const Xbyak::Reg64& t7, const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0) + { n_ = 9; tbl_[0] = t0; tbl_[1] = t1; tbl_[2] = t2; tbl_[3] = t3; tbl_[4] = t4; tbl_[5] = t5; tbl_[6] = t6; tbl_[7] = t7; tbl_[8] = t8; } + Pack(const Xbyak::Reg64& t9, const Xbyak::Reg64& t8, const Xbyak::Reg64& t7, const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0) + { n_ = 10; tbl_[0] = t0; tbl_[1] = t1; tbl_[2] = t2; tbl_[3] = t3; tbl_[4] = t4; tbl_[5] = t5; tbl_[6] = t6; tbl_[7] = t7; tbl_[8] = t8; tbl_[9] = t9; } + Pack(const Xbyak::Reg64& ta, const Xbyak::Reg64& t9, const Xbyak::Reg64& t8, const Xbyak::Reg64& t7, const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0) + { n_ = 11; tbl_[0] = t0; tbl_[1] = t1; tbl_[2] = t2; tbl_[3] = t3; tbl_[4] = t4; tbl_[5] = t5; tbl_[6] = t6; tbl_[7] = t7; tbl_[8] = t8; tbl_[9] = t9; tbl_[10] = ta; } + Pack(const Xbyak::Reg64& tb, const Xbyak::Reg64& ta, const Xbyak::Reg64& t9, const Xbyak::Reg64& t8, const Xbyak::Reg64& t7, const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0) + { n_ = 12; tbl_[0] = t0; tbl_[1] = t1; tbl_[2] = t2; tbl_[3] = t3; tbl_[4] = t4; tbl_[5] = t5; tbl_[6] = t6; tbl_[7] = t7; tbl_[8] = t8; tbl_[9] = t9; tbl_[10] = ta; tbl_[11] = tb; } + Pack& append(const Xbyak::Reg64& t) + { + if (n_ == maxTblNum) { + fprintf(stderr, "ERR Pack::can't append\n"); + XBYAK_THROW_RET(ERR_BAD_PARAMETER, *this) + } + tbl_[n_++] = t; + return *this; + } + void init(const Xbyak::Reg64 *tbl, size_t n) + { + if (n > maxTblNum) { + fprintf(stderr, "ERR Pack::init bad n=%d\n", (int)n); + XBYAK_THROW(ERR_BAD_PARAMETER) + } + n_ = n; + for (size_t i = 0; i < n; i++) { + tbl_[i] = tbl[i]; + } + } + const Xbyak::Reg64& operator[](size_t n) const + { + if (n >= n_) { + fprintf(stderr, "ERR Pack bad n=%d(%d)\n", (int)n, (int)n_); + XBYAK_THROW_RET(ERR_BAD_PARAMETER, rax) + } + return tbl_[n]; + } + size_t size() const { return n_; } + /* + get tbl[pos, pos + num) + */ + Pack sub(size_t pos, size_t num = size_t(-1)) const + { + if (num == size_t(-1)) num = n_ - pos; + if (pos + num > n_) { + fprintf(stderr, "ERR Pack::sub bad pos=%d, num=%d\n", (int)pos, (int)num); + XBYAK_THROW_RET(ERR_BAD_PARAMETER, Pack()) + } + Pack pack; + pack.n_ = num; + for (size_t i = 0; i < num; i++) { + pack.tbl_[i] = tbl_[pos + i]; + } + return pack; + } + void put() const + { + for (size_t i = 0; i < n_; i++) { + printf("%s ", tbl_[i].toString()); + } + printf("\n"); + } +}; + +class StackFrame { +#ifdef XBYAK64_WIN + static const int noSaveNum = 6; + static const int rcxPos = 0; + static const int rdxPos = 1; +#else + static const int noSaveNum = 8; + static const int rcxPos = 3; + static const int rdxPos = 2; +#endif + static const int maxRegNum = 14; // maxRegNum = 16 - rsp - rax + Xbyak::CodeGenerator *code_; + Xbyak::Reg64 pTbl_[4]; + Xbyak::Reg64 tTbl_[maxRegNum]; + Pack p_; + Pack t_; + int pNum_; + int tNum_; + int saveNum_; + int P_; + bool useRcx_; + bool useRdx_; + bool makeEpilog_; + StackFrame(const StackFrame&); + void operator=(const StackFrame&); +public: + const Pack& p; + const Pack& t; + /* + make stack frame + @param sf [in] this + @param pNum [in] num of function parameter(0 <= pNum <= 4) + @param tNum [in] num of temporary register(0 <= tNum, with UseRCX, UseRDX) #{pNum + tNum [+rcx] + [rdx]} <= 14 + @param stackSizeByte [in] local stack size + @param makeEpilog [in] automatically call close() if true + + you can use + rax + gp0, ..., gp(pNum - 1) + gt0, ..., gt(tNum-1) + rcx if tNum & UseRCX + rdx if tNum & UseRDX + rsp[0..stackSizeByte - 1] + */ + StackFrame(Xbyak::CodeGenerator *code, int pNum, int tNum = 0, int stackSizeByte = 0, bool makeEpilog = true) + : code_(code) + , pNum_(pNum) + , tNum_(tNum & ~(UseRCX | UseRDX)) + , saveNum_(0) + , P_(0) + , useRcx_((tNum & UseRCX) != 0) + , useRdx_((tNum & UseRDX) != 0) + , makeEpilog_(makeEpilog) + , p(p_) + , t(t_) + { + using namespace Xbyak; + if (pNum < 0 || pNum > 4) XBYAK_THROW(ERR_BAD_PNUM) + const int allRegNum = pNum + tNum_ + (useRcx_ ? 1 : 0) + (useRdx_ ? 1 : 0); + if (tNum_ < 0 || allRegNum > maxRegNum) XBYAK_THROW(ERR_BAD_TNUM) + const Reg64& _rsp = code->rsp; + saveNum_ = local::max_(0, allRegNum - noSaveNum); + const int *tbl = getOrderTbl() + noSaveNum; + for (int i = 0; i < saveNum_; i++) { + code->push(Reg64(tbl[i])); + } + P_ = (stackSizeByte + 7) / 8; + if (P_ > 0 && (P_ & 1) == (saveNum_ & 1)) P_++; // (rsp % 16) == 8, then increment P_ for 16 byte alignment + P_ *= 8; + if (P_ > 0) code->sub(_rsp, P_); + int pos = 0; + for (int i = 0; i < pNum; i++) { + pTbl_[i] = Xbyak::Reg64(getRegIdx(pos)); + } + for (int i = 0; i < tNum_; i++) { + tTbl_[i] = Xbyak::Reg64(getRegIdx(pos)); + } + if (useRcx_ && rcxPos < pNum) code_->mov(code_->r10, code_->rcx); + if (useRdx_ && rdxPos < pNum) code_->mov(code_->r11, code_->rdx); + p_.init(pTbl_, pNum); + t_.init(tTbl_, tNum_); + } + /* + make epilog manually + @param callRet [in] call ret() if true + */ + void close(bool callRet = true) + { + using namespace Xbyak; + const Reg64& _rsp = code_->rsp; + const int *tbl = getOrderTbl() + noSaveNum; + if (P_ > 0) code_->add(_rsp, P_); + for (int i = 0; i < saveNum_; i++) { + code_->pop(Reg64(tbl[saveNum_ - 1 - i])); + } + + if (callRet) code_->ret(); + } + ~StackFrame() + { + if (!makeEpilog_) return; + close(); + } +private: + const int *getOrderTbl() const + { + using namespace Xbyak; + static const int tbl[] = { +#ifdef XBYAK64_WIN + Operand::RCX, Operand::RDX, Operand::R8, Operand::R9, Operand::R10, Operand::R11, Operand::RDI, Operand::RSI, +#else + Operand::RDI, Operand::RSI, Operand::RDX, Operand::RCX, Operand::R8, Operand::R9, Operand::R10, Operand::R11, +#endif + Operand::RBX, Operand::RBP, Operand::R12, Operand::R13, Operand::R14, Operand::R15 + }; + return &tbl[0]; + } + int getRegIdx(int& pos) const + { + assert(pos < maxRegNum); + using namespace Xbyak; + const int *tbl = getOrderTbl(); + int r = tbl[pos++]; + if (useRcx_) { + if (r == Operand::RCX) { return Operand::R10; } + if (r == Operand::R10) { r = tbl[pos++]; } + } + if (useRdx_) { + if (r == Operand::RDX) { return Operand::R11; } + if (r == Operand::R11) { return tbl[pos++]; } + } + return r; + } +}; +#endif + +class Profiler { + int mode_; + const char *suffix_; + const void *startAddr_; +#ifdef XBYAK_USE_PERF + FILE *fp_; +#endif +public: + enum { + None = 0, + Perf = 1, + VTune = 2 + }; + Profiler() + : mode_(None) + , suffix_("") + , startAddr_(0) +#ifdef XBYAK_USE_PERF + , fp_(0) +#endif + { + } + // append suffix to funcName + void setNameSuffix(const char *suffix) + { + suffix_ = suffix; + } + void setStartAddr(const void *startAddr) + { + startAddr_ = startAddr; + } + void init(int mode) + { + mode_ = None; + switch (mode) { + default: + case None: + return; + case Perf: +#ifdef XBYAK_USE_PERF + close(); + { + const int pid = getpid(); + char name[128]; + snprintf(name, sizeof(name), "/tmp/perf-%d.map", pid); + fp_ = fopen(name, "a+"); + if (fp_ == 0) { + fprintf(stderr, "can't open %s\n", name); + return; + } + } + mode_ = Perf; +#endif + return; + case VTune: +#ifdef XBYAK_USE_VTUNE + dlopen("dummy", RTLD_LAZY); // force to load dlopen to enable jit profiling + if (iJIT_IsProfilingActive() != iJIT_SAMPLING_ON) { + fprintf(stderr, "VTune profiling is not active\n"); + return; + } + mode_ = VTune; +#endif + return; + } + } + ~Profiler() + { + close(); + } + void close() + { +#ifdef XBYAK_USE_PERF + if (fp_ == 0) return; + fclose(fp_); + fp_ = 0; +#endif + } + void set(const char *funcName, const void *startAddr, size_t funcSize) const + { + if (mode_ == None) return; +#if !defined(XBYAK_USE_PERF) && !defined(XBYAK_USE_VTUNE) + (void)funcName; + (void)startAddr; + (void)funcSize; +#endif +#ifdef XBYAK_USE_PERF + if (mode_ == Perf) { + if (fp_ == 0) return; + fprintf(fp_, "%llx %zx %s%s", (long long)startAddr, funcSize, funcName, suffix_); + /* + perf does not recognize the function name which is less than 3, + so append '_' at the end of the name if necessary + */ + size_t n = strlen(funcName) + strlen(suffix_); + for (size_t i = n; i < 3; i++) { + fprintf(fp_, "_"); + } + fprintf(fp_, "\n"); + fflush(fp_); + } +#endif +#ifdef XBYAK_USE_VTUNE + if (mode_ != VTune) return; + char className[] = ""; + char fileName[] = ""; + iJIT_Method_Load jmethod = {}; + jmethod.method_id = iJIT_GetNewMethodID(); + jmethod.class_file_name = className; + jmethod.source_file_name = fileName; + jmethod.method_load_address = const_cast(startAddr); + jmethod.method_size = funcSize; + jmethod.line_number_size = 0; + char buf[128]; + snprintf(buf, sizeof(buf), "%s%s", funcName, suffix_); + jmethod.method_name = buf; + iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, (void*)&jmethod); +#endif + } + /* + for continuous set + funcSize = endAddr - + */ + void set(const char *funcName, const void *endAddr) + { + set(funcName, startAddr_, (size_t)endAddr - (size_t)startAddr_); + startAddr_ = endAddr; + } +}; +#endif // XBYAK_ONLY_CLASS_CPU + +} } // end of util + +#endif diff --git a/wx_key/wx_key.sln b/wx_key/wx_key.sln new file mode 100644 index 0000000..c9ef2eb --- /dev/null +++ b/wx_key/wx_key.sln @@ -0,0 +1,31 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.14.36414.22 d17.14 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "wx_key", "wx_key.vcxproj", "{0893DC21-164E-4EBE-8717-1B33029D0C56}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|x64 = Debug|x64 + Debug|x86 = Debug|x86 + Release|x64 = Release|x64 + Release|x86 = Release|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {0893DC21-164E-4EBE-8717-1B33029D0C56}.Debug|x64.ActiveCfg = Debug|x64 + {0893DC21-164E-4EBE-8717-1B33029D0C56}.Debug|x64.Build.0 = Debug|x64 + {0893DC21-164E-4EBE-8717-1B33029D0C56}.Debug|x86.ActiveCfg = Debug|Win32 + {0893DC21-164E-4EBE-8717-1B33029D0C56}.Debug|x86.Build.0 = Debug|Win32 + {0893DC21-164E-4EBE-8717-1B33029D0C56}.Release|x64.ActiveCfg = Release|x64 + {0893DC21-164E-4EBE-8717-1B33029D0C56}.Release|x64.Build.0 = Release|x64 + {0893DC21-164E-4EBE-8717-1B33029D0C56}.Release|x86.ActiveCfg = Release|Win32 + {0893DC21-164E-4EBE-8717-1B33029D0C56}.Release|x86.Build.0 = Release|Win32 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {329E540D-BE3E-4616-A973-AB86D6A3EAD2} + EndGlobalSection +EndGlobal diff --git a/wx_key/wx_key.vcxproj b/wx_key/wx_key.vcxproj new file mode 100644 index 0000000..6ca95da --- /dev/null +++ b/wx_key/wx_key.vcxproj @@ -0,0 +1,180 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + Debug + x64 + + + Release + x64 + + + + 17.0 + Win32Proj + {0893dc21-164e-4ebe-8717-1b33029d0c56} + wxkey + 10.0 + + + + DynamicLibrary + true + v145 + Unicode + + + DynamicLibrary + false + v145 + true + Unicode + + + DynamicLibrary + true + v145 + Unicode + + + DynamicLibrary + false + v145 + true + Unicode + + + + + + + + + + + + + + + + + + + + + Level3 + true + WIN32;_DEBUG;WXKEY_EXPORTS;_WINDOWS;_USRDLL;%(PreprocessorDefinitions) + true + Use + pch.h + $(SolutionDir)vendor\xbyak;$(SolutionDir)vendor\MinHook_134_lib\include;%(AdditionalIncludeDirectories) + /utf-8 %(AdditionalOptions) + + + Windows + true + false + + + + + Level3 + true + true + true + WIN32;NDEBUG;WXKEY_EXPORTS;_WINDOWS;_USRDLL;%(PreprocessorDefinitions) + true + Use + pch.h + $(SolutionDir)vendor\xbyak;$(SolutionDir)vendor\MinHook_134_lib\include;%(AdditionalIncludeDirectories) + /utf-8 %(AdditionalOptions) + + + Windows + true + false + + + + + Level3 + true + _DEBUG;WXKEY_EXPORTS;_WINDOWS;_USRDLL;%(PreprocessorDefinitions) + true + NotUsing + pch.h + $(SolutionDir)vendor\xbyak;$(SolutionDir)vendor\MinHook_134_lib\include;%(AdditionalIncludeDirectories) + /utf-8 %(AdditionalOptions) + + + Windows + true + false + $(SolutionDir)vendor\MinHook_134_lib\lib;%(AdditionalLibraryDirectories) + libMinHook.x64.lib;%(AdditionalDependencies) + + + + + Level3 + true + true + true + NDEBUG;WXKEY_EXPORTS;_WINDOWS;_USRDLL;%(PreprocessorDefinitions) + true + NotUsing + pch.h + $(SolutionDir)vendor\xbyak;$(SolutionDir)vendor\MinHook_134_lib\include;%(AdditionalIncludeDirectories) + /utf-8 %(AdditionalOptions) + + + Windows + true + false + $(SolutionDir)vendor\MinHook_134_lib\lib;%(AdditionalLibraryDirectories) + libMinHook.x64.lib;$(CoreLibraryDependencies);%(AdditionalDependencies) + + + + + + + + + + + + + + + + + + + + + + + + + + + Create + Create + Create + Create + + + + + diff --git a/wx_key/wx_key.vcxproj.filters b/wx_key/wx_key.vcxproj.filters new file mode 100644 index 0000000..b61d651 --- /dev/null +++ b/wx_key/wx_key.vcxproj.filters @@ -0,0 +1,84 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + 头文件 + + + 头文件 + + + 头文件 + + + 头文件 + + + 头文件 + + + 头文件 + + + 头文件 + + + 头文件 + + + 头文件 + + + 头文件 + + + 头文件 + + + + + 源文件 + + + 源文件 + + + 源文件 + + + 源文件 + + + 源文件 + + + 源文件 + + + 源文件 + + + 源文件 + + + 源文件 + + + 源文件 + + + diff --git a/wx_key/wx_key.vcxproj.user b/wx_key/wx_key.vcxproj.user new file mode 100644 index 0000000..88a5509 --- /dev/null +++ b/wx_key/wx_key.vcxproj.user @@ -0,0 +1,4 @@ + + + + \ No newline at end of file