写了个Sunday算法的实现....
本帖最后由 F8LEFT 于 2015-6-11 22:03 编辑如题,以前就知道了Sunday这个算法,只是一直没有弄出来,所以现在有点时间了就顺便写了一下了。挺简单但是高效的一个算法,有兴趣的可以去了解一下,原理也有写明在代码中了。
2015.6.4 更新Hex查找,现在可以使用通配符?了,注意一下,使用通配符会降低匹配速度,选用通配符的位置值得注意一下。通常来说通配符放的越后效率越低。另外,选取的子字符串长度越长,效率越高。代码测试还不完全,可能有bug请注意。
另外,本例代码未经优化,仅供参考。所以追求效率的同学请右上角百度一下。
2015.6.7 更新内存搜索,用法与SundayHex函数一样,搜索自身内存数据。唔,暂时还想不到还有什么内容要更新了那么就暂时先这样吧。
2015.6.11 内存搜索再次更新Ex版本,应该会快一点,另外修复了某些bug,此外的便是SundayF算法正式停用了,恩,估计实用价值可能不太高。
FSunday.h
#ifndef FSUNDAY_H_INCLUDED
#define FSUNDAY_H_INCLUDED
#include <windows.h>
namespace FSunday{
int Sunday(char* Src, unsigned long dwSrcLen, char* Sub, unsigned long dwSubLen); //Sunday算法,快速查找Src中Sub的位置,下标以0为开始,失败返回-1
//消耗一定量的空间来生成位置表,来提高匹配效率
//int SundayF(char* Src, unsigned long dwSrcLen, char* Sub, unsigned long dwSubLen); //原始Sunday算法
int SundayB(char* Src, unsigned long dwSrcLen, char* Sub, unsigned long dwSubLen); //Byte
int SundayW(char* Src, unsigned long dwSrcLen, char* Sub, unsigned long dwSubLen); //Word
int SundayDW(char* Src, unsigned long dwSrcLen, char* Sub, unsigned long dwSubLen); //DWORD
int SundayHex(char* Src, unsigned long dwSrcLen, char* Sub); //Sub均16进制代码,以0为结束。支持通配符?, 比如 "2F3C??4D5E",长度必须为2的倍数
#ifdef _WINDOWS_ //#include <windows.h>后可用
DWORD SundayMem(LPVOID lpMemAddr, unsigned int dwMemLen, char* Sub); //内存扫描,支持通配符,用法类似于SundayHex
DWORD SundayMemEx(LPVOID lpMemAddr, unsigned int dwMemLen, char* Sub); //内存扫描,加速版。不一定准确?
#endif
//private: 注意,永远不要手动调用下面的函数
bool __SundayHexInit__(char* Sub, unsigned long*p, char* HexSub, unsigned long dwSubLen);
int __SundayHex__(char* Src, unsigned long dwSrcLen, char* Sub, unsigned long* p, char* HexSub, unsigned long dwSubLen);
//end
};
#endif // FSUNDAY_H_INCLUDED
FSunday.cpp
/*
Sunday算法,是对字符串匹配算法的一种改进
改进的地方体现在字符串比较方法与移动方法上面
比较就不说了,就说说移动方法,有两种情况,见例子
一轮比较后,进行字符串移动
情况1:
Src:1234567890
Sub:125
下一轮是用6与Sub中的字符比较,因为6不在Sub中,所以Sub的位置肯定比6大,移动如下
Src:1234567890
Sub: 125
恰好移动过了6,开始下一轮比较。
情况2:
Src:1234567890
Sub:165
下一轮用6与Sub中的字符比较(从右端开始),因为6在Sub中,故进行右端的对齐
Src:1234567890
Sub: 165
对齐后,在开始下一轮比较。
这样增加了移动的效率,从而提高了匹配的速度。
本例代码的改进主要在于移动时添加了一项表来计算移动步长,从而不用每次移动时重新计算步长。
*/
#include "FSunday.h"
#include <iostream>
typedef unsigned long DWORD;
typedef unsigned short WORD;
typedef unsigned char BYTE;
bool FHexCharValid(char c)
{
if(c >= '0' && c <= '9' ||
c >= 'A' && c <= 'F' ||
c >= 'a' && c <= 'f' ||
c == '?')
return true;
else
return false;
}
bool FHexDecoder(char* Dec, char* Src)
{
char HighC, LowC;
DWORD dwSrcLen = strlen(Src) / 2;
int i;
for(i = 0; i < dwSrcLen; i++) {
HighC = Src, LowC = Src;
if(!FHexCharValid(LowC) || !FHexCharValid(HighC))
return false;
HighC -= '0';
if(HighC > 9) HighC -= 7;
if(HighC > 0xf) HighC -= 0x20;
LowC -= '0';
if(LowC > 9) LowC -= 7;
if(LowC > 0xf) LowC -= 0x20;
Dec = (HighC<<4)|LowC;
}
return true;
}
int FSunday::Sunday(char* Src, unsigned long dwSrcLen, char* Sub, unsigned long dwSubLen)
{
if(dwSubLen == 0)
return -1;
//if(dwSubLen <= 0xF)
// return SundayF(Src, dwSrcLen, Sub, dwSubLen);
//if(dwSubLen <= 0xFF)
// return SundayB(Src, dwSrcLen, Sub, dwSubLen);
if(dwSubLen <= 0xFFFF)
return SundayW(Src, dwSrcLen, Sub, dwSubLen);
if(dwSubLen <= 0xFFFFFFFF)
return SundayDW(Src, dwSrcLen, Sub, dwSubLen);
return -1;
}
/*
int FSunday::SundayF(char* Src, unsigned long dwSrcLen, char* Sub, unsigned long dwSubLen)
{
DWORD j, k;
j = dwSubLen - 1;
bool bContinue = true;
bool bSuccess;
while (bContinue) {
bSuccess = true;
for(k = 0; k < dwSubLen; k++) {
if(Src != Sub) {
bSuccess = false;
break;
}
}
if(bSuccess)
bContinue = false;
else { //移动j指针
for(k = 0; k < dwSubLen; k++) {
if(Src == Sub) {
break;
}
}
j += k+1;
}
if(j >= dwSrcLen)
break;
}
if(j < dwSrcLen)
return j-dwSubLen+1;
else
return -1;
}*/
int FSunday::SundayB(char* Src, unsigned long dwSrcLen, char* Sub, unsigned long dwSubLen)
{
BYTE* p = new BYTE; //table P,标志距离
DWORD i; DWORD j, k;
// memset(p, dwSubLen, sizeof(BYTE)*0xFF);
for(i = 0; i < 0x100; i++) {
p = dwSubLen;
}
for(i = 0; i < dwSubLen; i++) { //扫描Sub,初始化 P 表
p[(BYTE)Sub] = dwSubLen - i - 1; //memset
}
//开始配对字符串
//j为 Sub位置指标, k为 当前匹配位置
j = dwSubLen - 1; //初始化位置为 dwSubLen - 1,匹配顺序为从右到左
bool bContinue = true;
bool bSuccess;
while(bContinue) {
bSuccess = true;
for(k = 0; k < dwSubLen; k++) {
if(Src != Sub) {
bSuccess = false;
break;
}
}
if(bSuccess)
bContinue = false;
else { //移动j指针
if(j < dwSrcLen - 1)
j += p[(BYTE)Src] + 1;
else j++;
}
if(j >= dwSrcLen)
break;
}
delete []p;
if(j < dwSrcLen)
return j-dwSubLen+1;
else
return -1;
}
int FSunday::SundayW(char* Src, unsigned long dwSrcLen, char* Sub, unsigned long dwSubLen)
{
WORD* p = new WORD; //table P,标志距离
DWORD i; DWORD j, k;
for(i = 0; i < 0x100; i++) {
p = dwSubLen;
}
for(i = 0; i < dwSubLen; i++) { //扫描Sub,初始化 P 表
p[(BYTE)Sub] = dwSubLen - i;
}
//开始配对字符串
//j为 Sub位置指标, k为 当前匹配位置
j = dwSubLen - 1; //初始化位置为 dwSubLen - 1,匹配顺序为从右到左
bool bContinue = true;
bool bSuccess;
while(bContinue) {
bSuccess = true;
for(k = 0; k < dwSubLen; k++) {
if(Src != Sub) {
bSuccess = false;
break;
}
}
if(bSuccess)
bContinue = false;
else { //移动j指针
if(j < dwSrcLen-1)
j += p[(BYTE)Src];
else j++;
}
if(j >= dwSrcLen)
break;
}
delete []p;
if(j < dwSrcLen)
return j-dwSubLen+1;
else
return -1;
}
int FSunday::SundayDW(char* Src, unsigned long dwSrcLen, char* Sub, unsigned long dwSubLen)
{
DWORD* p = new DWORD; //table P,标志距离
DWORD i; DWORD j, k;
for(i = 0; i < 0x100; i++) {
p = dwSubLen;
}
for(i = 0; i < dwSubLen; i++) { //扫描Sub,初始化 P 表
p[(BYTE)Sub] = dwSubLen - i;
}
//开始配对字符串
//j为 Sub位置指标, k为 当前匹配位置
j = dwSubLen - 1; //初始化位置为 dwSubLen - 1,匹配顺序为从右到左
bool bContinue = true;
bool bSuccess;
while(bContinue) {
bSuccess = true;
for(k = 0; k < dwSubLen; k++) {
if(Src != Sub) {
bSuccess = false;
break;
}
}
if(bSuccess)
bContinue = false;
else { //移动j指针
if(j < dwSrcLen - 1)
j += p[(BYTE)Src];
else j++;
}
if(j >= dwSrcLen)
break;
}
delete []p;
if(j < dwSrcLen)
return j-dwSubLen+1;
else
return -1;
}
int FSunday::SundayHex(char* Src, unsigned long dwSrcLen, char* Sub)
{
DWORD dwSubLen = strlen(Sub);
if(dwSubLen % 2) //长度必须为2的倍数
return -1;
dwSubLen /= 2;
char* HexSub = new char;
DWORD* p = new DWORD; //table P,标志距离
int i = -1;
if(FSunday::__SundayHexInit__(Sub, p, HexSub, dwSubLen)) {
i = FSunday::__SundayHex__(Src, dwSrcLen, Sub, p, HexSub, dwSubLen);
}
delete []p;
delete []HexSub;
return i;
}
bool FSunday::__SundayHexInit__(char* Sub, DWORD*p, char* HexSub, unsigned long dwSubLen)
{
if(!FHexDecoder(HexSub, Sub)) {
return false;
}
DWORD i;
for(i = 0; i < 0x100; i++) {
p = -1;
}
int WildAddr = 0;
for(i = 0; i < dwSubLen; i++) {
if(Sub == '?')
WildAddr = i;
}
for(i = WildAddr+1; i < dwSubLen; i++) { //扫描Sub,初始化 P 表
p[(BYTE)HexSub] = dwSubLen - i;
}
for(i = 0; i < 0x100; i++) {
if(p == -1)
p = dwSubLen - WildAddr;
}
return true;
}
int FSunday::__SundayHex__(char* Src, unsigned long dwSrcLen, char* Sub, DWORD* p, char* HexSub, DWORD dwSubLen)
{
//开始配对字符串
//j为 Sub位置指标, k为 当前匹配位置
DWORD j, k;
j = dwSubLen - 1; //初始化位置为 dwSubLen - 1,匹配顺序为从右到左
bool bContinue = true;
bool bSuccess;
while(bContinue) {
bSuccess = true;
for(k = 0; k < dwSubLen; k++) {
if(Sub[(dwSubLen - k - 1)*2] != '?' && Src != HexSub){
bSuccess = false;
break;
}
}
if(bSuccess)
bContinue = false;
else { //移动j指针
if(j < dwSrcLen -1) //防止j+1 >= dwSrcLen造成溢出
j += p[(BYTE)Src];
else j++;
}
if(j >= dwSrcLen)
break;
}
if(j < dwSrcLen)
return j-dwSubLen+1;
else
return -1;
}
#ifdef _WINDOWS_
#define PageSize 0x1000
DWORD FSunday::SundayMem(LPVOID lpMemAddr, unsigned int dwMemLen, char* Sub)
{
DWORD dwSubLen = strlen(Sub);
if(dwSubLen % 2) //长度必须为2的倍数
return -1;
dwSubLen /= 2;
char* HexSub = new char;
DWORD* p = new DWORD; //table P,标志距离
int i = -1;
if(FSunday::__SundayHexInit__(Sub, p, HexSub, dwSubLen)) {
UINT RPage; //当前已读取的数据 * PageSize
char* rPoint;
DWORD offset = -1;
if(RPage = (DWORD)lpMemAddr % PageSize) {
RPage = PageSize - RPage;
if(!IsBadReadPtr(lpMemAddr, 1)) {
rPoint = (char*)lpMemAddr;
offset = FSunday::__SundayHex__(rPoint, RPage, Sub, p, HexSub, dwSubLen);
}
if(offset != -1) {
delete []p;
delete []HexSub;
return offset;
}
}
for(; RPage < dwMemLen; RPage += PageSize) {
rPoint = (char*)((DWORD)lpMemAddr + RPage);
if(!IsBadReadPtr(rPoint, 1)) {
offset = FSunday::__SundayHex__(rPoint, PageSize, Sub, p, HexSub, dwSubLen);
}
if(offset != -1) {
delete []p;
delete []HexSub;
return offset + RPage;
}
}
}
delete []p;
delete []HexSub;
return -1;
}
DWORD FSunday::SundayMemEx(LPVOID lpMemAddr, unsigned int dwMemLen, char* Sub)
{
DWORD dwSubLen = strlen(Sub);
if(dwSubLen % 2) //长度必须为2的倍数
return -1;
dwSubLen /= 2;
char* HexSub = new char;
DWORD* p = new DWORD; //table P,标志距离
int i = -1;
if(FSunday::__SundayHexInit__(Sub, p, HexSub, dwSubLen)) {
DWORD offset = -1;
MEMORY_BASIC_INFORMATION meminfo;
LPVOID ptr = (LPVOID)0xFFFFFFFF; //LoopAddr
//①
if(VirtualQuery(lpMemAddr, &meminfo, sizeof(MEMORY_BASIC_INFORMATION))) {
if(!IsBadReadPtr(lpMemAddr, 1)) {
ptr = (LPVOID)((DWORD)meminfo.BaseAddress + meminfo.RegionSize);
offset = FSunday::__SundayHex__((PCHAR)lpMemAddr, (DWORD)ptr - (DWORD)lpMemAddr, Sub, p, HexSub, dwSubLen);
}
if(offset != -1) {
delete []p;
delete []HexSub;
return offset;
}
}
while ((DWORD)ptr - (DWORD)lpMemAddr < dwMemLen)
{
if(VirtualQuery(ptr, &meminfo, sizeof(MEMORY_BASIC_INFORMATION))) {
if(!IsBadReadPtr(ptr, 1)) {
offset = FSunday::__SundayHex__((PCHAR)meminfo.BaseAddress, meminfo.RegionSize, Sub, p, HexSub, dwSubLen);
}
if(offset != -1) {
delete []p;
delete []HexSub;
return offset + (DWORD)meminfo.BaseAddress - (DWORD)lpMemAddr;
}
ptr = (LPVOID)((DWORD)meminfo.BaseAddress + meminfo.RegionSize);
} else {
break;
}
}
}
delete []p;
delete []HexSub;
return -1;
}
#endif // _WINDOWS_
main.cpp
#include <iostream>
#include <string>
#include <fstream>
#include <time.h>
#include "FSunday.h"
using namespace std;
//char Sub[] = "Wtf,I Fuck You!!!";
char SubHex[] = "6A00E852F1FFFF8BF085F6742964A118000000817834B7000000751EFF750C56E8E6F1FFFF85C074113D80000000740A56E81EE9FFFF33C0EB028BC68B4DFC33";
int memfind(const char *mem, const char *str, int sizem, int sizes);
int main()
{
//char Src[] = "fgjsderiodfda1546egdjkjdkfdadghh";
//char Sub[] = "fda";
/*char Src[] = {
0xBC, 0xBC, 0xBC, 0xBC, 0xBC, 0xBC, 0x1C, 0x65, 0x1F, 0x65, 0x1F, 0x65, 0xD1, 0x6D, 0xF1, 0xB6,
0x5A, 0xDF, 0x16, 0xD5, 0xF1, 0xA6, 0x5F, 0x4E, 0x91, 0xDF, 0xB6, 0x51, 0xD6, 0xF5, 0x1A, 0x65,
0xF1, 0x65, 0xBD, 0xF1, 0x65, 0x1F, 0x65, 0x16, 0xB5, 0x1F, 0xB6, 0x51, 0x65, 0x1E, 0x91, 0xDF,
0x65, 0x89, 0x76, 0x51, 0x6F, 0x19, 0x84, 0xDF, 0x65, 0x1B, 0x6D, 0xF5, 0x1B, 0x6F, 0x8B, 0x4A,
0xE9, 0x6F, 0x1B, 0x6D, 0xC5, 0xB1, 0x98, 0x76, 0x51, 0xD6, 0x57, 0x49, 0x8E, 0xA5, 0x61, 0xB9,
0x81, 0x65, 0x1E, 0x41, 0xB6, 0x5F, 0x1D, 0x65, 0x1D, 0x64, 0x8D, 0x91, 0x65, 0xD1, 0x65, 0x6D
};
char Sub[] = {0x65,0x1F, 0x65};*/
ifstream ifile("F:\\新番\\xxoo.exe", ios::binary);
ifile.seekg(0, ios::end);
unsigned int szSrcLen = ifile.tellg();
ifile.seekg(0, ios::beg);
char* Src = new char;
ifile.read(Src, szSrcLen);
ifile.close();
char Sub[] = {
0x9C, 0xBE, 0xD5, 0xC0, 0x48, 0xC3, 0x6A, 0xC5, 0x5F, 0xC6, 0x4C, 0xC5, 0xD3, 0xC2, 0x48, 0xC0,
0x49, 0xBE, 0x20, 0xBD, 0xFA, 0xBC, 0x95, 0xBD, 0xFA, 0xBE, 0xEC, 0xC0, 0x96, 0xC1, 0x8A, 0xBF,
0x2C, 0xBC, 0xF7, 0xB9, 0xDC, 0xB9, 0x00, 0xBC, 0x38, 0xC0, 0x17, 0xC5, 0x1A, 0xC9, 0xB1, 0xCB,
0x44, 0xCB, 0x82, 0xC7, 0x2C, 0xC4, 0x34, 0xC3, 0x5C, 0xC2, 0xF3, 0xC0, 0x51, 0xC0, 0xB7, 0xBF
};
char Sub2[] = "9CBED5C048C36AC55FC64CC5D3C248C049BE20BDFABC95BDFABEECC096C18ABF2CBCF7B9DCB900BC38C017C51AC9B1CB44CB82C72CC434C35CC2F3C051C0B7BF";
unsigned int dwTimeNew, dwTimeOld;
dwTimeNew = clock();
cout<<FSunday::Sunday(Src, szSrcLen, Sub, sizeof(Sub))<<endl;
dwTimeOld = clock();
cout<<"<FSunday::Sunday,耗时"<<dwTimeOld - dwTimeNew<<"ms"<<endl;
/*dwTimeNew = clock();
cout<<FSunday::SundayF(Src, szSrcLen, Sub, sizeof(Sub))<<endl;
dwTimeOld = clock();
cout<<"<FSunday::SundayF,耗时"<<dwTimeOld - dwTimeNew<<"ms"<<endl;*/
dwTimeNew = clock();
cout<<FSunday::SundayB(Src, szSrcLen, Sub, sizeof(Sub))<<endl;
dwTimeOld = clock();
cout<<"<FSunday::SundayB,耗时"<<dwTimeOld - dwTimeNew<<"ms"<<endl;
dwTimeNew = clock();
cout<<FSunday::SundayW(Src, szSrcLen, Sub, sizeof(Sub))<<endl;
dwTimeOld = clock();
cout<<"<FSunday::SundayW,耗时"<<dwTimeOld - dwTimeNew<<"ms"<<endl;
dwTimeNew = clock();
cout<<FSunday::SundayDW(Src, szSrcLen, Sub, sizeof(Sub))<<endl;
dwTimeOld = clock();
cout<<"<FSunday::SundayDW,耗时"<<dwTimeOld - dwTimeNew<<"ms"<<endl;
dwTimeNew = clock();
cout<<FSunday::SundayHex(Src, szSrcLen, Sub2)<<endl;
dwTimeOld = clock();
cout<<"<FSunday::SundayHex,耗时"<<dwTimeOld - dwTimeNew<<"ms"<<endl;
dwTimeNew = clock();
cout<<memfind(Src, Sub, szSrcLen, sizeof(Sub))<<endl;
dwTimeOld = clock();
cout<<"memfind,耗时"<<dwTimeOld - dwTimeNew<<"ms"<<endl;
dwTimeNew = clock();
cout<<(hex)<<"0x"<<FSunday::SundayMemEx((LPVOID)((DWORD)GetModuleHandle(NULL)), 0xFFFFFFFF, SubHex)<<endl;
dwTimeOld = clock();
cout<<(dec)<<"FSunday::SundayMemEx,耗时"<<dwTimeOld - dwTimeNew<<"ms"<<endl;
dwTimeNew = clock();
cout<<(hex)<<"0x"<<FSunday::SundayMem((LPVOID)((DWORD)GetModuleHandle(NULL)), 0xFFFFFFFF, SubHex)<<endl;
dwTimeOld = clock();
cout<<(dec)<<"FSunday::SundayMem,耗时"<<dwTimeOld - dwTimeNew<<"ms"<<endl;
delete []Src;
cout << "Hello world!" << endl;
system("pause");
return 0;
}
int memfind(const char *mem, const char *str, int sizem, int sizes)
{
int da,i,j;
if (sizes == 0) da = strlen(str);
else da = sizes;
for (i = 0; i < sizem; i++)
{
for (j = 0; j < da; j ++)
{
if (mem != str)
break;
}
if (j == da)
return i;
}
return -1;
}另外附件为vs2012工程,再次附上。
F8师傅八七威武,弱弱滴问一下:这洋码子打的不错,就是不知道写的啥............... 看不懂,只有支持大神F8了,膜拜大婶 本帖最后由 wx_f1Jji177 于 2015-4-14 23:26 编辑
换换脑子,试了一下,找到首次出现的位置
太专业了,只能支持F8!!! 本帖最后由 heihu 于 2015-4-15 00:43 编辑
经测试,无法查找文件内的字符串,测试文件为视频文件,大小132M,是不是不能用于文件查找? 留个脚印 等用的时候来研究 {:soso_e179:} 好像见过 不过不会。谢谢F8老师的教育 膜拜我F8师傅.. 使用F8老师的Sunday算法还是没法查找到文件字符串,使用古老的方法到是可以找到的,请F8老是指点int memfind(const char *mem, const char *str, int sizem, int sizes)
{
int da,i,j;
if (sizes == 0) da = strlen(str);
else da = sizes;
for (i = 0; i < sizem; i++)
{
for (j = 0; j < da; j ++)
{
if (mem != str)
break;
}
if (j == da)
return i;
}
return -1;
}