hellovfp的个人资料

libjpeg-turbo解码器1.4.90 根据上一篇关于libjpeg的dll编译一文，将手里以前下载的libjpeg-turbo 1.4.90项目中解码器部份提取出来，生成了静态库和动态库，通过了msvc和mingw编译，对图片进行解码测试，没有发现有什么问题，速度还算不错，生成的Dll大概是108K左右。（1.5.3版在mingw下还有编译错误，和limits.h有关）

关于MSVC如何编译IJG的libjpeg库生成DLL的方法我们都知道IJG的这个著名的jpeg图像库，按照自带的说明编译，一般得到的是静态链结库LIB文件，网上多的是编译过程的文章，就不多说了，以6b版为例，如何生成一个jpeg.dll呢？估计大多数人找不到相关的资料。我也是摸索了一阵子，终于搞定了这个问题，将里面的解码部分的文件提取出来，经过Mingw的gcc编译，最终生成了一个59K的dll，可解base line和progressive的jpeg文件。

一个比较快的JPEG解码器，700多行原代码来自codeproject网上，国内的一个博士根据IGJ的jpeg库简写的代码，接口简单。我花了两天，对代码进一步进行了改进，进一步提升了解码速度。项目下载地址在二楼：

关于C/C++代码注释统计问题一、代码注释统计问题，算是一个学习过程中常见的一个题目，在cnblog上也有网友弄了一段C++的统计代码，详见：http://tieba.baidu.com/mo/q/checkurl?url=https%3A%2F%2Fwww.cnblogs.com%2Fnchar%2Fp%2F3915889.html&urlrefer=28dbf11106254afb125cf7851f806257。我拿来编译了一下，gcc和vc可以编译，但vc的debug模式下，无法运行，有越界的异常抛出，其实博主关于这个问题的分析还是写得不错的，基本上该思考的地方都到了，但不幸的是，对于其它源代码的统计，明显是错的。二、又顺手把以前的vckbase代码仓库里的一个代码统计和网上以前下的一个叫源代码智能统计专家拿来测试了一下，随手从7-zip和libpng老外的源代码中选了几个行数较多的源代码，测试结果让人大跌眼镜，拿notepad++手动数了一下注释和空行，vckbase里的除了空行统计有一行的小误差外，其它是正常的，但由于使用的是vc6的MFC+第三方MFC类做的界面，多最小化几次，界面就会出现bug，按钮居然画出到桌面上去了，晕，而另一个软件统计专家统计出来的结果是错误的，虽然界面没有问题。三、我又仔细读了一下上面那篇博客上的分析，最后写了一段统计代码，经过测试，统计结果终于是正确的了。由于代码使用了以前写的文件内存映射代码，采用特殊的行读取方式，所以整个统计过程超快，老外的5900多行的代码统计基本不耗时，爽啊。一段典型的可能出现各种各样注释的测试文件：（花样百出，但可以通过编译）样本： //dam it */ /**//* this is muti comment * /" // ; blank in comment // /* */ # define BB "gss" // bug line // /**/ // single comment /* single " comment */ /* this is second muti commnet hello keitty */ #include <stdio.h> //this can be? #include<windows.h> int main() { char s[] = /* abc */ "http:://http://tieba.baidu.com/mo/q/checkurl?url=http%3A%2F%2Fwww.hellovfp.com&urlrefer=14dbf09edfd7426c0f10d296b6e7d45c \" \ / can you see me\ // \ /*bug here*/ "; /*// */ char ch = '"'; char chs = '\"'; /*bug*/ char ss[] /*bug*/ = BB; /* inline muti comment */ puts("/*"); char ds[] = "f\ " ; puts("*/"); char gs[] = "..//happy line\ //"; // char bs[][15] = { "//bug1", "//* bug3 */", //here " "/*bug2*/" }; puts(s); puts(ds); // "inline comment" // // ; return 1; /* inline "can you see me?" "disp commnet */ HWND hwnd = CreateWindowA("edit", //class name "//have bug", //window name"; WS_CHILD, /* "style" */ 0, 0, 100, 100, /**/ NULL, NULL, NULL, NULL // /* );//shell return 0; /*/ "can you see me?" commnet */ return 0; /*two*/ /* "can you see me?" commnet */ } 你也可以自己写一段代码来测试一下上面这个样本文件，测试结果是总行数71, 代码行 34, 注释行 38, 空行15。我的统计代码如下： // linux 文本行结尾是0x0a, windows是 0x0d, 0x0a // 还有一个文本转换问题 // 单行读取方式和整块判断。 // 使用特殊的行读取方式，line_start, line_end两个指针返回一行，去首尾空格，制表符等也是。 #include <stdio.h> #include <time.h> #include "File.h" #include "Comm_func.h" #include "FileFinder.h" struct TextLine { char *begin; //一行开始 char *end; //一行结束 }; struct Count { int total; //总行数 int comment; // 注释行计数（不含代码） int marks; // 行内注释（有代码含注释） int blanks; // 空白行数 bool flag1; bool flag2; }; bool read_line(TextLine & tl) { if(tl.end == 0){ tl.end = tl.begin; } else{ if(*tl.end == 0) return false; tl.begin = ++tl.end; } while(*tl.end && *tl.end != 0xa) tl.end++; return true; } void put_line(TextLine l) { while(l.begin <= l.end) putchar(*l.begin++); } template<typename T> inline bool is_space(T ch) { return (ch == '\n' || ch == '\r' || ch == '\t' || ch == ' '); } template<typename T> bool have_ch(const T* beg, const T* end, T ch) { for(;beg < end;++beg, --end){ if(*beg == ch || *end == ch ) return true; } return false; } bool have_key(char *beg, char *end) { for(;beg < end; ++beg){ if(*beg == '/' && (beg[1] == '/' || beg[1] == '*')){ return true; } if(*beg == '*' && beg[1] == '/' || *beg == '"'){ return true; } } return false; } int analyse(TextLine line, Count& cnt) { char *beg = line.begin, *end = line.end; // 过滤左边空格字符 while(beg < end && is_space(*beg)){ ++beg; } // 如果是空行 /* */内的空白行也计算在内 if(beg == end){ ++cnt.blanks; return 0; } if(cnt.flag1 && !find(beg, end, "*/")){ //处理块注释 ++cnt.comment; return 1; } if(!cnt.flag1 && !cnt.flag2 && *beg == '/' && beg[1] == '/'){ //处理"//"开头的注释行 ++cnt.comment; return 1; } if(cnt.flag2 && !have_ch(beg, end, '"') || !have_key(beg, end)){ //过滤掉不含注释符或是引号的行 return 4; } //put_line(line); char state = 0; for(;beg < end; ++beg) { if(beg[1]=='"' && *beg =='\'' || *beg == '\\' ){//读取到\"或是\\"时跳过 ++beg; continue; } if(!cnt.flag2){ //处理注释符部分 if((!cnt.flag1 && *beg=='/' && beg[1] == '*') || (cnt.flag1 && *beg == '*' && beg[1] == '/')){ cnt.flag1 = ! cnt.flag1; state |= 1; ++beg; continue; } if(!cnt.flag1 && *beg == '/' && beg[1] == '/'){ //有//注释时跳出 state |= 1; break; } } if(!cnt.flag1 && *beg == '"'){ //读取引号 cnt.flag2 = !cnt.flag2; continue; } if(!(state & 2) && !cnt.flag1){ if(!is_space(*beg)) state |= 2; } }// end for if(state & 1){//不如把2和1合并了 if(state & 2){ ++cnt.marks; return 2; } ++cnt.comment; return 1; } return 4; } int main() { FileFinder finder; if(!finder.find(L"*")) return 1; do{ if(finder.is_dir()) continue; utf16 *p = rfind(finder.name, finder.name+diy::strlen(finder.name), L'.'); if(istrcmp(p, L".c")==0 || istrcmp(p, L".cpp")==0){ sys::File file; file.open(finder.name); //不同可能少了 if(!file.is_open()) continue; TextLine line = {file.read()}; Count cnt= {0}; clock_t start = clock(); while(read_line(line)) { ++cnt.total; switch(analyse(line, cnt)) { case 1://单行 //put_line(line); break; case 2: // 行内 //put_line(line); break; } } double time = clock() -start; wprintf(L"%s\n", finder.name); printf("run time is %.3f\n", time / 1000); printf("all line is %d, blank=%d, comment=%d, marks=%d\n", cnt.total, cnt.blanks, cnt.comment, cnt.marks); printf("all=%d, code=%d, comm=%d, blank=%d\n", cnt.total, cnt.total-cnt.comment-cnt.blanks, cnt.comment + cnt.marks, cnt.blanks); puts("-----------------------------------"); } }while(finder.next()); return 0; // this is the basic comment line } 运行结果：整个操作就是两指针在内存中一阵操作，然后结果就出来了，主分析代码函数只有69行，爽吧？

无聊，写了一个暗黑2小工具 C/C++混合编程，编译后不依赖C++库。采用多线程进行目录搜索，除了界面，其它基本上都是手工代码。链接: http://tieba.baidu.com/mo/q/checkurl?url=https%3A%2F%2Fp&urlrefer=336a56de667d80856318554db2702361 a n.baidu.com/s/14TkYWL64yJYbfiowFpn-FQ 提取码: 293j

文件搜索与匹配算法中一个诡异问题的解决受够了win8文件查找的不爽，只好自己实现文件和文本内容搜索，写好了主要算法结构，结果无意中在XP中测试时出现一个诡异的bug，在调试时，单步调试时，会在扫描文件函数调用时挂掉，但不调试，直接运行又能成功运行，在win8中又不存在这个问题，代码如下： class FileInfo { HANDLE hFind; typedef WIN32_FIND_DATAA finfo; typedef SYSTEMTIME ftime; public: finfo info; ftime time; ~FileInfo() { if(hFind!= INVALID_HANDLE_VALUE) FindClose(hFind);//不要二次调用，会引起死循环的 } //文件搜索 bool find(const char* findstr) { hFind = FindFirstFileA(findstr, &info); return hFind != INVALID_HANDLE_VALUE; } bool next() { return FindNextFileA(hFind, &info) > 0; } //文件时间转换 void create_time() { FileTimeToSystemTime(&info.ftCreationTime, &time); } void access_time() { FileTimeToSystemTime(&info.ftLastAccessTime, &time); } void write_time() { FileTimeToSystemTime(&info.ftLastWriteTime, &time); } void close() { FindClose(hFind); } bool is_dir() { return (info.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)>0; } }; 扫描函数： void scan_dir2(const char*path) { static char sub[MAX_PATH]; FileInfo fi; char *p = copystr(sub, MAX_PATH, path); if(*(p-1) != '/'){ *p++ = '/'; } copystr(p, MAX_PATH-(p-sub), "*"); if(!fi.find(sub)) return; *p=0; do { if(fi.info.cFileName[0] == '.' /*|| fi.info.cFileName[0]=='$'*/) continue; if(fi.is_dir()){ copystr(p, MAX_PATH-(p-sub), fi.info.cFileName); //printf("dir:%s,%#x\n", sub, fi.info.dwFileAttributes); scan_dir2(sub); } else{ scan_do(fi.info.cFileName, sub); } }while(fi.next()); //fi.close();这里会挂掉 } 调试了一下，发现进入一个子目录后，列举文件完成，返回后就会在fi.close()这句挂掉，让人抓狂，静下心来，仔细起了一下，想起最近添加了析构函数，多半应该是二次释放资源引起的，于是在close关闭后，将句柄手动复位，问题居然这这样解决了，由此猜想新操作系统可能在这里进行过修改，既使无意中二次释放也不会出错。然后从Java的一个字符串匹配算法，重写成C++版的，由于Java里没有指针，写出来的代码比c++的好看，我改写了一个下标版和一个指针版，实际测试，指针版的最快，这里给出指针版的： bool is_match4(const char*s, const char *p) { const char *ms = 0, *mp = 0;//记录匹配到'*'的位置 while(*s) { if(*p && (tolower(*s) == tolower(*p) || *p == '?')){//直接字符匹配或p里是'?' ++s; ++p; } else if(*p == '*'){//是'*'时，记录当前指针位置，并且先把j指针走一步 ms = s; mp = p++; } else if(mp){//j走一步后和i不匹配，则需要用'*'匹配掉当前i,所有i走一步，j退一步，再继续比较 s = ++ms; p = mp + 1; } else//没有完全匹配或者匹配不正确 return false; } while(*p)//s已经完全匹配完，但是j指针还没有走完的情况 if(*p++ != '*') return false; return true; } 配合上面的基础文件扫描框架，可以实现遍历任意目录指定文件： #include <stdio.h> #include <windows.h> #include <string.h> #include <ctype.h> class FileInfo { HANDLE hFind; typedef WIN32_FIND_DATAA finfo; typedef SYSTEMTIME ftime; public: finfo info; ftime time; ~FileInfo() { if(hFind!= INVALID_HANDLE_VALUE) FindClose(hFind);//不要二次调用，会引起死循环的 } //文件搜索 bool find(const char* findstr) { hFind = FindFirstFileA(findstr, &info); return hFind != INVALID_HANDLE_VALUE; } bool next() { return FindNextFileA(hFind, &info) > 0; } //文件时间转换 void create_time() { FileTimeToSystemTime(&info.ftCreationTime, &time); } void access_time() { FileTimeToSystemTime(&info.ftLastAccessTime, &time); } void write_time() { FileTimeToSystemTime(&info.ftLastWriteTime, &time); } bool is_dir() { return (info.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)>0; } }; int istrcmp(const char*s1, const char* s2) { char ch1, ch2; do{ ch1 = *s1++; ch2 = *s2++; if(ch1 >= 'A' && ch1 <= 'Z') ch1 +=32; if(ch2 >= 'A' && ch2 <= 'Z') ch2 +=32; }while(ch1 && ch2 && ch1 == ch2); return ch1 - ch2; } template<typename T> T* copystr(T* dst, int size, const T* s) { while(--size> 0 && *s) *dst++ = *s++; *dst = 0; return dst; } bool is_match4(const char*s, const char *p) { const char *ms = 0, *mp = 0;//记录匹配到'*'的位置 while(*s) { if(*p && (tolower(*s) == tolower(*p) || *p == '?')){//直接字符匹配或p里是'?' ++s; ++p; } else if(*p == '*'){//是'*'时，记录当前指针位置，并且先把j指针走一步 ms = s; mp = p++; } else if(mp){//j走一步后和i不匹配，则需要用'*'匹配掉当前i,所有i走一步，j退一步，再继续比较 s = ++ms; p = mp + 1; } else//没有完全匹配或者匹配不正确 return false; } while(*p)//s已经完全匹配完，但是j指针还没有走完的情况 if(*p++ != '*') return false; return true; } static int count = 0; void show_zip(const char* s) { const char *p = strrchr(s, '.'); if(p){ ++p; if(istrcmp(p, "rar")==0 || istrcmp(p, "zip") == 0){ puts(s); ++count; } } } static void(*scan_do)(const char* name, const char* path); void init_scan(void(*pfun)(const char*, const char*)) { scan_do = pfun; } void scan_dir2(const char*path) { static char sub[MAX_PATH]; FileInfo fi; char *p = copystr(sub, MAX_PATH, path); if(*(p-1) != '/'){ *p++ = '/'; } copystr(p, MAX_PATH-(p-sub), "*"); if(!fi.find(sub)) return; *p=0; do { if(fi.info.cFileName[0] == '.' /*|| fi.info.cFileName[0]=='$'*/) continue; if(fi.is_dir()){ copystr(p, MAX_PATH-(p-sub), fi.info.cFileName); //printf("dir:%s,%#x\n", sub, fi.info.dwFileAttributes); scan_dir2(sub); } else{ scan_do(fi.info.cFileName, sub); } }while(fi.next()); //fi.close(); } #include <time.h> void show_all(const char*s, const char* path) { puts(s); ++count; } void show_cpp(const char*s, const char* path) { const char *p = strrchr(s, '.'); if(p){ ++p; if(istrcmp(p, "c")==0 || istrcmp(p, "cpp") == 0){ puts(s); ++count; } } } static char *filters[] = {"*.txt"/*, "*.d2s", "*.mp3"*/}; void show_filter(const char*s, const char* path) { bool ret = false; for(int i = 0; i < 1; ++i){ if(is_match4(s, filters[i])){ ret = true; break; } } //if(is_match4(s, filters[0]) || is_match4(s, filters[1])) //ret = true; if(ret){ //printf("%s%s\n", path, s); char buf[MAX_PATH], *p;//这种写法比printf还快 p = copystr(buf, MAX_PATH, path); copystr(p, MAX_PATH-(p-buf), s); puts(buf); ++count; } } void chdir(const char*s) { SetCurrentDirectoryA(s); } template<typename T> void replace_all(T* s, T sch, T x) { for(;*s; ++s){ if(*s == sch) *s = x; } } void test_copy() { char d[20], s[] = "****", *p; p = copystr(d, 20, s); printf("%s, %d\n", d, p-d); } int main(int argc, char* argv[]) { double time; clock_t start = clock(); init_scan(show_filter); if(argc == 2){ //chdir(argv[1]); replace_all(argv[1], '\\', '/'); scan_dir2(argv[1]); } else scan_dir2("z:"); time = clock()-start; printf("scan use %.3f\n%d files\n", time/ 1000, count); }

数字转16进制字符串的五种写法与速度对比 #include<stdio.h> #include "random.h" static char tab[] = "0123456789abcdef"; char * toh(char *p, int val) { if(val > 0){ p = toh(p, val >> 4); *p++ = tab[val & 0xf]; } return p; } // 递归写法 void tohex1(char *buf, int val) { char *p = toh(buf, val); if(p == buf) *p++ = '0'; *p = 0; } // 位移写法 void tohex2(char *s, int val) { int i = 28; //for(; i>0 && ((val>>i) & 0xf) == 0; i-=4); for(; i >=0; i-=4) *s++ = tab[(val>>i) & 0xf]; *s = 0; } // 位移写法的改进版 void tohex3(char *s, int val) { for(int i = 3; i >=0; --i){ int t = val >>(i << 3); *s++ = tab[(t>>4)& 0xf]; *s++ = tab[t& 0xf]; } *s = 0; } // 常见写法，使用反转 void tohex4(char *s, int val) { char *p = s; do{ *s++ = tab[val&0xf]; }while(val >>=4); *s = 0; while(p<s-1){//反转 char t = *p; *p++ = *--s; *s = t; } } union Hex { char c[4]; int val; }; // 联合直接转换法 void tohex5(char *s, int val) { Hex h; h.val = val; *s++ = tab[(h.c[3]>>4)&0xf]; *s++ = tab[h.c[3] & 0xf]; *s++ = tab[(h.c[2]>>4)&0xf]; *s++ = tab[h.c[2] & 0xf]; *s++ = tab[(h.c[1]>>4)&0xf]; *s++ = tab[h.c[1] & 0xf]; *s++ = tab[(h.c[0]>>4)&0xf]; *s++ = tab[h.c[0] & 0xf]; *s = 0; } //测试代码： int main() { char buf[5][10]; int i, val = 0x1234bcde, N = 10000000; double time1; Clock.start(); for(i = 0; i < N; ++i) tohex1(buf[0], val); time1=Clock.elapse(); printf("hex1 :%.3f\n", time1); Clock.start(); for(i = 0; i < N; ++i) tohex2(buf[1], val); time1=Clock.elapse(); printf("hex2 :%.3f\n", time1); Clock.start(); for(i = 0; i < N; ++i) tohex3(buf[2], val); time1=Clock.elapse(); printf("hex3 :%.3f\n", time1); Clock.start(); for(i = 0; i < N; ++i) tohex4(buf[3], val); time1=Clock.elapse(); printf("hex4 :%.3f\n", time1); Clock.start(); for(i = 0; i < N; ++i) tohex5(buf[4], val); time1=Clock.elapse(); printf("hex5 :%.3f\n", time1); puts(buf[0]); puts(buf[1]); puts(buf[2]); puts(buf[3]); puts(buf[4]); } 运行结果：由此可见，递归写法最慢，有趣的是算法3, 循环次数减半，用时也会减半，算法5由于没有循环，所以速度居然达到最快，这种将循环展开为直接代码输出，可有效提升代码运行速度，这种方式在阅读mpg123的原代码也经看到。算法2-5适合不同的应用场景，记录于此，仅供参考。

谈二分查找实现与改进翻开C数据结构一书，第二章有一个经典的二分查找示例程序： typedef int ElementType; #define NotFound (-1) /* START: fig2_9.txt */ int BinarySearch( const ElementType A[ ], ElementType X, int N ) { int Low, Mid, High; /* 1*/ Low = 0; High = N - 1; /* 2*/ while( Low <= High ) { /* 3*/ Mid = ( Low + High ) / 2; /* 4*/ if( A[ Mid ] < X ) /* 5*/ Low = Mid + 1; else /* 6*/ if( A[ Mid ] > X ) /* 7*/ High = Mid - 1; else /* 8*/ return Mid; /* Found */ } /* 9*/ return NotFound; /* NotFound is defined as -1 */ } 提一个问题，如果有一个10大小的数组顺序存有0-9个整数，你觉得用上面二分查找找到这10个数，分别用了多少次循环？比如查找0元素和9元素?，我们来写一段程序进行验证： #include <stdio.h> int count = 0; //循环次数 template<typename T> int binary_find(T * A, size_t N, T x) { int low = 0, mid, high= N-1; while(low <= high) { mid = (low + high) >> 1; if(A[mid] == x) return mid; if(A[mid] < x) low = mid + 1; else high = mid - 1; ++count; } return -1; } //测试程序 int main() { int i, size = 10;//6, 9, 13, 16 19 23(5, 8, 11, 15, 18, 21) //10到1000万数据最大查找次数 int *arr = new int[size]; for(i = 0; i < size; ++i) arr[i] = i; int x , max = 0; for(i = 0; i < size; ++i){ x = binary_find(arr, size, i); //x = bfind(arr, size, i); if(x == -1) puts("not found"); else printf("found %d: count %d\n", arr[x], count); if(max < count) max = count; count = 0; } printf("max =%d\n", max); delete[] arr; } ---------------------------------------- 运行结果： found 0: count 2 found 1: count 1 found 2: count 2 found 3: count 3 found 4: count 0 found 5: count 2 found 6: count 3 found 7: count 1 found 8: count 2 found 9: count 3 max =3 看到了吧，查找某元素循环次数最大是3, 我们再看看数组为100时的情况，将size改成100; found 0: count 5 found 1: count 6 found 2: count 4 found 3: count 5 found 4: count 6 found 5: count 3 found 6: count 5 found 7: count 6 found 8: count 4 found 9: count 5 中间不显示了 .......................... found 90: count 4 found 91: count 5 found 92: count 6 found 93: count 3 found 94: count 5 found 95: count 6 found 96: count 4 found 97: count 6 found 98: count 5 found 99: count 6 max =6 请按任意键继续. . . 嘿嘿，不难看出一头一尾元素用二分法居然需要5-6次循环，有什么改进办法？其实研究过快速排序的三分中值摸查模块，结合二分查找法的代码，不难看出，每次二分的时候，都有三个引索值，low, mid, high，只需要对这三个索引值在数组的位置，再进行判断一次就可以减少循环次数，改进代码如下： int bfind(int A[], int N, int x) { int low = 0, mid, high = N-1; while(low <= high) { mid = (low + high) >> 1; if(A[mid] == x) return mid; if(A[low] == x) return low;//添加 if(A[high] == x) return high;//添加 if(A[mid] < x){ --high;//添加 low = mid +1; } else{ ++low;//添加 high = mid -1; } ++count; } return -1; } 只添加了四行代码，再来看看为数组为10效果： found 0: count 0 found 1: count 1 found 2: count 1 found 3: count 1 found 4: count 0 found 5: count 1 found 6: count 1 found 7: count 2 found 8: count 1 found 9: count 0 max =2 请按任意键继续. . . 不仅是查找次数少了，而且各元素找到所用的次数也减少了，为100的情况就不重复帖了，感兴趣的自行验证。总结，代码在思考中进化，玩程序有趣的地方。

伪随机算法实现各语言实现示例。无聊，又研究了一下几种排序算法，在测速的时候，发现自己忘记了一个重要的问题，在某天看到有人在帖吧提到生成随机数只计数到32768就停止了，顺手查了一下C库函数的实现，才发现提供的只是0~32767之间的随机数，我去，图省力，直接用标准库去测试算法，实在是，以前自己弄过随机数C++类啊，居然没有拿来用，失算。重新查了一下一些随机数算法，挑了三个流行的常用算法，重新实现伪随机数生成算法库。通过msvc和mingw编译。 C示例： //random.h #ifndef __RANDOM_HEADER__ #define __RANDOM_HEADER__ #include <time.h> #ifdef _MSC_VER #if _MSC_VER < 1600 typedef unsigned int uint32_t; typedef unsigned char uint8_t; typedef unsigned __int64 uint64_t; typedef __int64 int64_t; #else #include <stdint.h> #endif #else #include <stdint.h> #endif /** *三种随机数算法函数库，支持long long随机数生成 */ struct tagRand { void (*seed)(void);// 设置算法种子 int (*next)(void);// 计算得到一个伪随机数 int (*range)(int low, int hight);//计算[low, high]之间的伪随机数 short (*next_short)(void);//返回0-32767之间的伪随机数 double (*next_double)(void);//随机浮点数 float (*next_float)(void);//随机浮点数 int64_t (*next64)(void);// 64位随机数，正负问题自行处理 void (*set_type)(int type); // 设置使用哪种随机算法type=0~2(lcm, xorshift32, well算法) }; extern struct tagRand Rand; #endif //random.c #include "random.h" static uint32_t seed[16]; static uint64_t seed64; static uint32_t get_seed(){ return (unsigned)time(0); } // C数据结构一书中线性同余法 static int lcm() { //A(48271), M(2147483647), Q=M/A(44488), R=M%A(3399) //公式:A*(seed % Q) - R*(seed/Q) 不使用宏，避免宏替换影响int A[]这样的参数 const int A = 48271, Q=44488, R = 3399; *seed = A * (*seed % Q) - R * (*seed / Q); return *seed; } // xorshift 32位算法, 另一种写法，周期较长 static int xor128() { uint32_t t=*seed^(*seed<<11); *seed=seed[1]; seed[1]=seed[2]; seed[2]=seed[3]; return seed[3]=seed[3]^(seed[3]>>19)^t^(t>>8) ; } // xorshift 32位算法 static int xorshift32() { uint32_t x = *seed; x ^= x << 13; x ^= x >> 17; x ^= x << 5; *seed = x; return x; } // well算法 static int well512() { uint32_t a,b,c,d; static int index = 0; a = seed[index]; c = seed[(index+13)&15]; b = a^c^(a<<16)^(c<<15); c = seed[(index+9)&15]; c ^= (c>>11); a = seed[index] = b^c; d = a ^((a<<5)& 0xDA442D20); index = (index + 15)&15; a = seed[index]; seed[index] = a^b^d^(a<<2)^(b<<18)^(c<<28); return seed[index]; } // 默认使用算法 static int (*rands)() = xorshift32; //设置使用哪种随机算法 static void set_algorithm(int type) { switch(type) { case 0: rands = lcm; case 1: rands = xorshift32; case 2: rands = well512; } } // 计算得到一个伪随机数 static int next(void) { return rands() & 0x7fffffff; } //计算[low, high]之间的伪随机数 static int range_next(int low, int high) { return low + next() % (high+1-low); } //返回0-32767之间的伪随机数 static short next_short(void) { return next() >> 16; } //随机浮点数 static double next_double(void) { return (double)next() / 0x7fffffff; } //随机浮点数 static float next_float(void) { return (float)next() / 0x7fffffff; } // 64位随机数，正负问题自行处理 static int64_t next64(void) { uint64_t x = seed64; x ^= x >> 12; x ^= x << 25; x ^= x >> 27; seed64 = x; return x * 0x2545F4914F6CDD1D; } // 设置算法种子 static void seed_rand(void) { int i; for( i = 0; i <8; ++i){ seed[i] = (get_seed() << (i+1))-1; seed[i+1] = get_seed()>>(i+1); } seed64 = *seed; seed64 = (seed64 << 32) | seed[next() & 15]; } // 初始化结构体 struct tagRand Rand = { seed_rand, next, range_next, next_short, next_double, next_float, next64, set_algorithm }; //测试程序： #include <stdio.h> #include "random.h" int main() { int i, size = 15; Rand.set_type(2); Rand.seed(); //随机整数测试 for(i = 0; i < size; ++i) printf("%d ", Rand.next()%10); putchar('\n'); //范围测试 for(i = 0; i < size; ++i) printf("%c ", Rand.range('A', 'Z')); putchar('\n'); for(i = 0; i < size; ++i) printf("%c ", Rand.range('a', 'z')); putchar('\n'); //浮点数测试 for(i = 0 ; i < 10; ++i) printf("%f\n", Rand.next_float()); //64位数测试 for(i = 0; i < 3; ++i) printf("%I64d\n", Rand.next64()); //for(i=0; i < size; ++i) //printf("%c ", rand_char()); //putchar('\n'); printf("sizeof rand:%d\n", sizeof(Rand)); } 运行结果： 0 7 1 7 0 1 9 2 5 6 2 3 1 2 9 E A P S W U T S V B S U Z W X t f u n d c m s c z e y r f s 0.389461 0.888160 0.725454 0.885750 0.817395 0.770790 0.688366 0.003831 0.185819 0.965431 135809110265936984 -2632048472102704751 6521441842461015894 sizeof rand:32 请按任意键继续. . .

散列表分离链结法C++简单实现与vs2015的一个bug 前段时间整理了一下项目代码，恢复了一下系统，发现vs2015的一个bug，以前写的对话框类在vs2015下编译失败，由于对话框需要theme支持，调用了INITCOMMONCONTROLSEX api，不论是否指定sdk里的comctl32.lib，系统始终无法链结上这个函数，这个真的很无语，同样的代码在其它版本或是mingw里完全没有问题。想起win32++项目是支持vs2015的，里面的对话框可以显示系统样式，查看了一下源代码，原来作者使用的是动态dll里直接调用这个函数来规避这个问题，晕。由于对话框需要一个数据结构来管理窗口对象，所以重新写了一下HashMap散列表数据结构：采用分离链结法+素数实现，使用特殊表头使内存占用小。自动计算素数以重新散列元素，让元素均匀分布，可使元素查找线性时间进一步缩短。 // itypes.h #ifndef __INTYPE_HEADER__ #define __INTYPE_HEADER__ #ifdef _MSC_VER #if _MSC_VER < 1600 typedef unsigned int uint32_t; typedef unsigned char uint8_t; typedef unsigned __int64 uint64_t; typedef __int64 int64_t; #else #include <stdint.h> #endif #else #include <stdint.h> #endif #endif #include <stdio.h> #include "itypes.h" #include <stdlib.h> #include <string> typedef unsigned int size_t; struct tagPrime { // 判断某个数是否是素数 bool isPrime(uint32_t n) { if(n < 2) return false; for(unsigned i = 2; i*i <= n; ++i) { if(n % i == 0) return false; } return true; } // 获取某个数后第一个素数 uint32_t next(uint32_t n) { for(++n; !isPrime(n); ++n) ; return n; } }Prime; // key-value结点 template<typename KeyType, typename ValType> struct TNode { KeyType key; ValType value; TNode* next; }; // 头结点 template<class T> struct THead { THead():next(0){} T* next; }; // 散列表，采用分离链结法+素数实现，使用特殊表头使内存占用小。 // 自动计算素数以重新散列元素，让元素均匀分布，可使元素查找线性时间进一步缩短。 template<class Key, class Value> class HashMap { typedef TNode<Key, Value> Node; typedef THead<Node> Head; public: HashMap() :table(0), tab_size(0), node_size(0) { tab_size = Prime.next(4); table = new Head[tab_size]; } ~HashMap() { clear(); delete[] table; } unsigned hash(const std::string& key) { return hash(key.c_str()); } // C数据结构一书中提出的一种字符串hash计算公式 uint32_t hash(const char* key) { uint32_t hash_val = 0; while(*key) hash_val = (hash_val << 5) + *key++; return hash_val; } // 普通整数，没啥可说的，主要是配合程序需要 uint32_t hash(uint32_t key) { return key; } // 清空hash表元素 void clear() { for(size_t i = 0; i < tab_size; ++i) { Node * node =table[i].next; while(node){ Node* tmp = node->next; delete node; node = tmp; } } node_size = 0; } // 根据关键字添加或修改键值，不用于查找不存在的键。 Value& operator[](const Key& key) { size_t index = hash(key) % tab_size; Node* node = table[index].next; while(node) { if(node->key == key){ return node->value; } node = node->next; } //没有找到则添加一个新key结点 node = new Node; node->next = table[index].next; table[index].next = node; node->key = key; ++node_size; //增加元素计数 return node->value; } // 重新散列原表中的元素（当达到装填因子load_factor时） void rehash() { Head* tmp = table; size_t tsize = tab_size; if(!need_load()) return;//没有达到装填因子，则返回 tab_size = Prime.next(node_size);//计算合适的素数 table = new Head[tab_size]; for(size_t i = 0; i < tsize; ++i) { Node* node = tmp[i].next; while(node){ Node* pn = node->next; //重新散列到新表 size_t index = hash(node->key) % tab_size; node->next = table[ index ].next; table[ index ].next = node; node = pn; } } delete[] tmp; } // 查询键值，不会增加元素 bool find(const Key& key, Value& val) { size_t index = hash(key) % tab_size; Node* node = table[ index ].next; for(;node; node = node->next) { if(node->key == key){ val = node->value; return true; } } return false; } // 删除某个元素 void remove(const Key& key) { Node head; //使用临时表头删除无表头链表 size_t index = hash(key) % tab_size; head.next = table[ index ].next; Node* prev = &head; Node* node = prev->next; while(node) { if(node->key == key){ prev->next = node->next; delete node; --node_size; table[ index ].next = head.next; return; } node = node->next; prev = prev->next; } } // 返回元素总个数 inline size_t size() const { return node_size; } void test1() { if((node_size * 10 / tab_size ) >= 9 ) puts("over load_factor"); else puts("less load_factor"); } void test2() { for(size_t i = 0; i < tab_size; ++i) printf("%s ", table[i].next == NULL ? "null" : "node"); putchar('\n'); } private: bool need_load() { return (node_size * 10 / tab_size) > load_factor; } private: Head* table; size_t tab_size; size_tnode_size; static const int load_factor = 9; //装填因子(1.0 = 10, 0.9 = 9) }; // 测试代码： int main() { HashMap<std::string, int> map; printf("size of HashMap:%u\n", sizeof(map)); //添加一些键值 map["C"] = 1; map["C++"] = 2; map["C#"] = 3; map["Java"] = 4; printf("now map size :%u\n", map.size()); map.test2(); int x; if(map.find("Java", x)) printf("found key=Java, value=%d\n", x); else puts("not found"); //修改键值 map["Java"] = 6; puts("modify data"); if(map.find("Java", x)) printf("found key=Java, value=%d\n", x); // 删除键值 map.remove("Java"); printf("now map size :%u\n", map.size()); if(map.find("Java", x)) printf("found key, value=%d\n", x); else puts("not found"); } 运行结果： size of HashMap:12 now map size :4 null null node node node found key=Java, value=4 modify data found key=Java, value=6 now map size :3 not found 请按任意键继续. . . 总结：这个基本上算是内存使用量最少的实现了，链表没有表头，所以删除时，使用一个临时表头的算法来减少代码量。后续思考：模运算很费CPU时间，另一个办法是产生2的N次方个表头，计算hash值的时候就可以不用模运算了，这个数据结构还有很多实现方法，

Hash五个算法C#实现一段时间没有玩C#了，正好机器里装了VS2015，所以又将Hash项目用C#实现了，在重写的过程中，发现了一个问题，就是取中文的hash值，大家都知道C#里的字符串string是unicode编码的，所以如果用c/c++的方法去计算字符串的Hash值，就会发现英文的话没有问题，中文的话运行值和C/ C++不同，然后查了一下C#的System.Text中关于字符编码的文章，最后用一个语句就解决了这个问题，如下： public override bool update(string s) { byte[] arr = Encoding.Default.GetBytes(s);//将utf-16转换成系统默认编码 return update(arr, arr.Length); } public override bool update(byte[] array, int size) { if (hash.reset) setup(); for (int i = 0; i < size; ++i) { hash.values[0] += array[i]; sum += hash.values[0]; } return true; } 测试主程序： using System; using System.IO; namespace MyHash { class Program { static void test_hash(IHash ih, string s) { ih.update(s); ih.final(); } static void Main(string[] args) { string s = "中文"; test_hash(HashFactory.Create(HashFactory.HashType.Crc32), s); test_hash(HashFactory.Create(HashFactory.HashType.Adler32), s); test_hash(HashFactory.Create(HashFactory.HashType.MD5), s); test_hash(HashFactory.Create(HashFactory.HashType.Sha1), s); test_hash(HashFactory.Create(HashFactory.HashType.Sha256), s); try { FileStream fs = new FileStream("..\\..\\hashdata.cs", FileMode.Open, FileAccess.Read); byte[] buf = new byte[1024]; int reads; IHash sha = HashFactory.Create(HashFactory.HashType.MD5); Console.WriteLine("Test File Hash:"); while ( (reads = fs.Read(buf, 0, 1024)) > 0) { sha.update(buf, reads); } sha.final(); fs.Close(); } catch (Exception ex) { Console.WriteLine("error :" + ex.Message); } } } /// /// Hash接口类 /// abstract class IHash { public abstract bool update(string s); public abstract bool update(byte[] array, int size); public abstract void final(); } /// /// Hash工厂类 /// class HashFactory { public enum HashType { Crc32, Adler32, MD5, Sha1, Sha256 } public static IHash Create(HashType type) { switch(type) { case HashType.Crc32: return new Crc32(); case HashType.Adler32: return new Adler32(); case HashType.MD5: return new Md5(); case HashType.Sha1: return new Sha1(); case HashType.Sha256: return new Sha256(); default: return null; } } }// end class } 运行结果：总结：本项目采用多态类和类组合方式实现，既可以使用接口调用，也可以使用单独类，自己也从这个项目中理清了各个类之间的关系，这是用纯面向对象语言的一种思维优势。全部代码：

五个Hash算法库C语言版无事，将前段时间写的C++版本的Hash算法库，改写成C语言版本，拿着函数指针大刀，一阵狂野的修改，生成的Dll比用C++的少2K，12K，然后又测试了tcc编译成dll，让人失望的是tcc生成的dll是15K, gcc生成的还要大，大概32K左右。。编译备注：编译时需要定义HASH_EXPORT_DLL宏。测试程序： #include <stdio.h> #include <string.h> #include "hash.h" int main() { const char* str = "hellovfp"; const char* types[] = {"crc32", "sha1", "md5", "adler32", "sha256"}; Hash hash; IHash ih; int i; for(i = 0; i < 5; ++i) { ih = hash_create(&hash, i); ih.update(&hash, str, strlen(str)); ih.final(&hash); hash_release(&hash); printf("%s:%s\n", types[i], hash.hex_str); } return 0; } 运行结果和软件对比图：好了，终于又搞定了，下一步，可以开始写国际版的Luo奔监视器了，嘿嘿。

简单的ios控制台输入输出库 /** 本字符串功能为支持unicode Windows API软件类库而写，提供了一些std::string没有的常用功能，如分割，格式化等。大部分功能和标准字符串功能相同，使用差不多，代码短小，运行速度快。格式化代码release编译运行速度比比C库sprintf函数要快4倍左右, 且没有写入出界问题没有提供replace功能，意义不大，可以用删除加插入功能实现。 format支持格式： %c: 字符 %s: 字符串(仅支持右对齐) %d: 有符号整数 %u: 无符号整数 %o: 八进制 %b: 二进制 %x[X]: 16进制,X输出大写 %p: 32位指针 %lp: 64位指针 %f: 浮点数 %g: 短式浮点表示 %lld, llu, llo, llb, llx: 64位整数，最后一位和上面相同不支持: %e, 科学记数法可以调用ecvt,fcvt,gcvt之类的函数调用实现，感兴趣的可以自行修改源代码添加支持。 */ // 更新记录： // 1.重整私有函数，从接口中分离。添加tohex,swap,equal,strlen等模版函数实现 // 2.修改构造函数为explicit，避免单参数隐式转换效率问题，同时添加因修改缺少的函数。 // 3.修改了默认构造函数分配大小，修正了重分配函数功能问题, 非必要不重分配。 // 4.修改了左查找代码，修正了右查找代码错误和删除函数输入负数的错误 // 5.添加了反转,添加和附值函数, 改进了插入代码。 // 6.改进了format代码，进一步提升执行效率，同时添加了64位数支持。 // 7.增加了大小写转换函数

控制台文本输出速度对比，结果让人无语。 //测试代码： #define CNT 3000 void test_console() { String s(L"I love my syster forevey!!!\n"); std::string s1("I love my syster forevey!!!\n"); double time1, time2, time3; Clocker clock; for(int i = 0; i < CNT; ++i) puts("I love my syster forevey!!!"); time1 = clock.elapse(); clock.restart(); for(int i = 0; i < CNT; ++i) console << s; time2 = clock.elapse(); clock.restart(); for(int i = 0; i < CNT; ++i) std::cout << s1; time3 = clock.elapse(); printf("puts use %.3f\n", time1); printf("write use %.3f\n", time2); printf("std::cout use %.3f\n", time3); } 测试C库的puts，和win32 API writeConsole，以及C++ 的cout输出同一个字符串3000次，在i5-5200(2.20GHZ) 本本+VS2010的测试结果：如上图所示，结果是API的速度快得让人无语，最慢的C++的cout，打算重写控制台输出库了。

c++生成独立dll的细节研究由于需要把自己写的三个hash算法类库化，想生成Dll以供后期程序调用，经过一翻测试，发现和C生成DLL还是有差别，静态链结没有问题，一但想动态加载dll的时候，就会发现程序崩溃，经过调试，发现能取得dll的函数，返回的接口也是正常的，调用算法方法时正常，但在调用toSring方法时，就挂了，显示std::string的 Alloc类调用失败，我去，难道不能动态加载，只能静态用？通过使用Depend软件查看dll的关联性，还发现一个错误，提示关联的msvcp90.dll丢失，晕。经过一翻思考后，对源代码进行了小范围的改动，将使用STL标准库的方法去掉，用结构体数组进行了替代，去掉了std::string的依赖，这里还有一个思考，或许可以用自己写的string类进行替换，但没有进行实验了。最后将用户使用的接口从hash_comm.h中提取出来，经过测试静态和动态加载，终于成功了！ // hash.h #ifndef __HASH_HEADER__ #define __HASH_HEADER__ #ifdef EXPORT_DLL #define DLL_API __declspec(dllexport) #else #define DLL_API __declspec(dllimport) #endif #define HASH_MAX 5 #define HASHS_MAX 41 // 算法类型ID #define HASH_CRC32 0 #define HASH_SHA1 1 #define HASH_MD5 2 struct Hresult { unsigned intvalue[HASH_MAX]; unsigned intlen; charhash[HASHS_MAX]; }; // hash算法接口类 class IHash { public: virtual ~IHash(){} virtual bool update(const char*, size_t)=0; virtual void final()=0; virtual void value(Hresult&)=0; }; // 接口工厂函数 extern "C" { DLL_API IHash* hash_create(int hash_type); DLL_API void hash_release(IHash *); } #endif // interface.cpp #include "crc32.h" #include "md5.h" #include "sha1.h" IHash* hash_create(int hash_type) { switch(hash_type) { case 0:return new Crc32; case 1:return new Sha1; case 2:return new Md5; } return 0; } void hash_release(IHash *ih) { if(ih) delete ih; } 生成的hash.dll只有12K，用Depend查看DLL关联，只和msvcr.dll和kernel.dll相关了。总结：这个经验对于纯算法的C++类有用，不能使用C++的STL类库，接口工厂函数的实现需要单独放入一个CPP文件中，且使用extern "C"进行申明。这样生成的dll既可以享用C++多态类使用的优点，又可以保持动态加载调用这个Dll特性，各个算法实现在dll中完全隐藏，用户并不可见，非常爽。吐槽一下：stl的string真的是慢，想要快速的话，慎用。push_back方法，比直接用数组输出慢1000倍，比直接实现string慢23倍以上，十万次执行的所用时间1.089s，直接实现所用时间是0.047s，过度使用设计模式的问题。

md5算法C++实现，第二次重写，137行超短代码主要改进：相比于源代码的221行，进一步缩短代码，没有了源代码大理的强制转换，接口统一成上一篇sha-1样式，去掉了大理的宏定义，所有代码不调用C库函数，直接算法版实现。纯算法版本。 //md5.h #include <string> #ifdef _MSC_VER #if _MSC_VER < 1600 typedef unsigned int uint32_t; typedef unsigned char uint8_t; typedef unsigned __int64 uint64_t; #else #include <stdint.h> #endif #else #include <stdint.h> #endif /** * 功能：md5散列算法C++实现 * 作者：hellovfp * 时间：2019.10.16 * 最后修改：2019.10.17 */ class Md5 { public: Md5(); ~Md5(); public: bool update(const char* data, size_t len); void final(); std::string toString(); private: void _init(); void _fill_zero(int n); void _hash(); private: uint64_t _size;//数据总位数 uint32_t _hashs[4];//hash结果 uint8_t _block[64]; short _index;//块索引 bool _reset;//初始化标志 }; // md5.cpp #include "md5.h" #define SHA1_MAX 2305843009213693952ULL typedef uint32_t (*calc)(uint32_t x, uint32_t y, uint32_t z); static int R[4][4] = { { 7, 12, 17, 22 },{ 5, 9, 14, 20 }, { 4, 11, 16, 23 },{ 6, 10, 15, 21 }}; //循环次数表 static int N[4][16] = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, { 1, 6, 11, 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12 }, { 5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2 }, { 0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9 } }; //x数组索引表 static int AC[4][16] = { {0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501, 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821}, // Round 2 {0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, 0xd62f105d, 0x2441453, 0xd8a1e681, 0xe7d3fbc8, 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a}, // Round 3 {0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70, 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x4881d05, 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665}, // Round 4 {0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1, 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391} };// 四轮运算表 // F, G, H and I are basic MD5 functions. staticuint32_t F(uint32_t x, uint32_t y, uint32_t z) { return (x & y) | ((~x) & z); } staticuint32_t G(uint32_t x, uint32_t y, uint32_t z) { return (x & z) | (y & (~z)); } staticuint32_t H(uint32_t x, uint32_t y, uint32_t z) { return x ^ y ^ z; } staticuint32_t I(uint32_t x, uint32_t y, uint32_t z) { return y ^ (x | (~z)); } // ROTATE_LEFT rotates x left n bits. // 公有函数，是否用这个名字？ static uint32_t rotate_left(uint32_t x, int n) { return (x << n) | (x >> (32-n)); } // 函数指针数组 static calc calcs[4] = {F, G, H, I}; // hash 计算公式 static void FFF(int n, uint32_t &a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t r, uint32_t ac){ a = b + rotate_left((a + calcs[n](b, c, d) + x + ac), r); } // 或许可以先调用tohex，然后每二位交换 static void _tohex2(uint32_t val, std::string& s) { char stuff[] = "0123456789abcdef"; //16进制表 for(int i = 0; i <32; i+=8){ s.push_back( stuff[(((val>>i)& 0xff) >> 4) & 0xf]); s.push_back( stuff[(((val>>i)& 0xff) >> 0) & 0xf]); } } Md5::Md5() { _init(); } Md5::~Md5() { } bool Md5::update(const char *data, size_t len) { if(_reset) {_init();} while(len--) { _block[_index++] = *data++; //拷贝数据 if(_size > SHA1_MAX) return false; //超出2^64计数范围 _size++; //计数器+1; if(_index == 64){//一个块填充完成则进行hash计算 _hash(); } } return true; } // 这份代码大至相同。 void Md5::final() { if(_reset) return; _block[_index++] = 0x80;//添加结束府 if(_index > 55){//如果后8位不够填充bit总数，则填0 _fill_zero(64); _hash(); } _fill_zero(56); //最后8字节前填充0 _size <<= 3;//计算总bit数 for(int i = 0; i <=56; i+=8)//最后8字节填充little endian总bit数 _block[_index++] = (uint8_t)(_size >> i); _hash();//结束计算 _reset = true; } std::string Md5::toString() { std::string s; for(int i = 0; i < 4; i++) _tohex2(_hashs[i], s); return s; } void Md5::_init() { _hashs[0] = 0x67452301; _hashs[1] = 0xefcdab89; _hashs[2] = 0x98badcfe; _hashs[3] = 0x10325476; _size = 0; _index = 0; _reset = false; } void Md5::_fill_zero(int n) { while(_index < n) _block[_index++]=0; } void Md5::_hash() { uint32_t a = _hashs[0], b = _hashs[1], c = _hashs[2], d = _hashs[3], x[16]; int i, j; // 转换成word for (i=0, j=0; j<64; i++, j+=4) x[i] = _block[j] | (_block[j+1] << 8) | (_block[j+2] << 16) | (_block[j+3] << 24); // 64次hash计算 for (i=0; i<4; i++) { for (j=0; j<4; j++) { FFF (i, a, b, c, d, x[N[i][(j<<2)+0]], R[i][0], AC[i][(j<<2)+0]); /* 1 */ FFF (i, d, a, b, c, x[N[i][(j<<2)+1]], R[i][1], AC[i][(j<<2)+1]); /* 2 */ FFF (i, c, d, a, b, x[N[i][(j<<2)+2]], R[i][2], AC[i][(j<<2)+2]); /* 3 */ FFF (i, b, c, d, a, x[N[i][(j<<2)+3]], R[i][3], AC[i][(j<<2)+3]); /* 4 */ } } _index = 0; _hashs[0] += a; _hashs[1] += b; _hashs[2] += c; _hashs[3] += d; } //测试文件main.cpp #include "md5.h" int main() { //697ce72239436d59c21b77652cd61637 Md5 md; md.update("hellovfp", 8); md.final(); printf("\nmd5 :%s\n", md.toString().c_str()); FILE *file; file = fopen(__FILE__, "rb"); if(file) { char buf[512]; while(!feof(file)) { size_t size = fread(buf, 1, 512, file); md.update(buf, size); } fclose(file); md.final(); printf("file md5:%s\n", md.toString().c_str()); return 0; } puts("open file error!"); return 1; }

md5算法C++实现，第二次重写，137行超短代码主要改进：相比于源代码的221行，进一步缩短代码，没有了源代码大理的强制转换，接口统一成上一篇 sha-1样式，去掉了大理的宏定义，所有代码不调用C库函数，直接算法版实现。纯算法版本。 //md5.h #include <string> #ifdef _MSC_VER #if _MSC_VER < 1600 typedef unsigned int uint32_t; typedef unsigned char uint8_t; typedef unsigned __int64 uint64_t; #else #include <stdint.h> #endif #else #include <stdint.h> #endif /** * 功能：md5散列算法C++实现 * 作者：hellovfp * 时间：2019.10.16 * 最后修改：2019.10.17 */ class Md5 { public: Md5(); ~Md5(); public: bool update(const char* data, size_t len); void final(); std::string toString(); private: void _init(); void _fill_zero(int n); void _hash(); private: uint64_t _size;//数据总位数 uint32_t _hashs[4];//hash结果 uint8_t _block[64]; short _index;//块索引 bool _reset;//初始化标志 }; // md5.cpp #include "md5.h" #define SHA1_MAX 2305843009213693952ULL typedef uint32_t (*calc)(uint32_t x, uint32_t y, uint32_t z); static int R[4][4] = { { 7, 12, 17, 22 },{ 5, 9, 14, 20 }, { 4, 11, 16, 23 },{ 6, 10, 15, 21 }}; //循环次数表 static int N[4][16] = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, { 1, 6, 11, 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12 }, { 5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2 }, { 0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9 } }; //x数组索引表 static int AC[4][16] = { {0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, 0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501, 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821}, // Round 2 {0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa, 0xd62f105d, 0x2441453, 0xd8a1e681, 0xe7d3fbc8, 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, 0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a}, // Round 3 {0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70, 0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x4881d05, 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665}, // Round 4 {0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, 0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1, 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391} };// 四轮运算表 // F, G, H and I are basic MD5 functions. staticuint32_t F(uint32_t x, uint32_t y, uint32_t z) { return (x & y) | ((~x) & z); } staticuint32_t G(uint32_t x, uint32_t y, uint32_t z) { return (x & z) | (y & (~z)); } staticuint32_t H(uint32_t x, uint32_t y, uint32_t z) { return x ^ y ^ z; } staticuint32_t I(uint32_t x, uint32_t y, uint32_t z) { return y ^ (x | (~z)); } // ROTATE_LEFT rotates x left n bits. // 公有函数，是否用这个名字？ static uint32_t rotate_left(uint32_t x, int n) { return (x << n) | (x >> (32-n)); } // 函数指针数组 static calc calcs[4] = {F, G, H, I}; // hash 计算公式 static void FFF(int n, uint32_t &a, uint32_t b, uint32_t c, uint32_t d, uint32_t x, uint32_t r, uint32_t ac){ a = b + rotate_left((a + calcs[n](b, c, d) + x + ac), r); } // 或许可以先调用tohex，然后每二位交换 static void _tohex2(uint32_t val, std::string& s) { char stuff[] = "0123456789abcdef"; //16进制表 for(int i = 0; i <32; i+=8){ s.push_back( stuff[(((val>>i)& 0xff) >> 4) & 0xf]); s.push_back( stuff[(((val>>i)& 0xff) >> 0) & 0xf]); } } Md5::Md5() { _init(); } Md5::~Md5() { } bool Md5::update(const char *data, size_t len) { if(_reset) {_init();} while(len--) { _block[_index++] = *data++; //拷贝数据 if(_size > SHA1_MAX) return false; //超出2^64计数范围 _size++; //计数器+1; if(_index == 64){//一个块填充完成则进行hash计算 _hash(); } } return true; } // 这份代码大至相同。 void Md5::final() { if(_reset) return; _block[_index++] = 0x80;//添加结束府 if(_index > 55){//如果后8位不够填充bit总数，则填0 _fill_zero(64); _hash(); } _fill_zero(56); //最后8字节前填充0 _size <<= 3;//计算总bit数 for(int i = 0; i <=56; i+=8)//最后8字节填充little endian总bit数 _block[_index++] = (uint8_t)(_size >> i); _hash();//结束计算 _reset = true; } std::string Md5::toString() { std::string s; for(int i = 0; i < 4; i++) _tohex2(_hashs[i], s); return s; } void Md5::_init() { _hashs[0] = 0x67452301; _hashs[1] = 0xefcdab89; _hashs[2] = 0x98badcfe; _hashs[3] = 0x10325476; _size = 0; _index = 0; _reset = false; } void Md5::_fill_zero(int n) { while(_index < n) _block[_index++]=0; } void Md5::_hash() { uint32_t a = _hashs[0], b = _hashs[1], c = _hashs[2], d = _hashs[3], x[16]; int i, j; // 转换成word for (i=0, j=0; j<64; i++, j+=4) x[i] = _block[j] | (_block[j+1] << 8) | (_block[j+2] << 16) | (_block[j+3] << 24); // 64次hash计算 for (i=0; i<4; i++) { for (j=0; j<4; j++) { FFF (i, a, b, c, d, x[N[i][(j<<2)+0]], R[i][0], AC[i][(j<<2)+0]); /* 1 */ FFF (i, d, a, b, c, x[N[i][(j<<2)+1]], R[i][1], AC[i][(j<<2)+1]); /* 2 */ FFF (i, c, d, a, b, x[N[i][(j<<2)+2]], R[i][2], AC[i][(j<<2)+2]); /* 3 */ FFF (i, b, c, d, a, x[N[i][(j<<2)+3]], R[i][3], AC[i][(j<<2)+3]); /* 4 */ } } _index = 0; _hashs[0] += a; _hashs[1] += b; _hashs[2] += c; _hashs[3] += d; } //测试文件main.cpp #include "md5.h" int main() { //697ce72239436d59c21b77652cd61637 Md5 md; md.update("hellovfp", 8); md.final(); printf("\nmd5 :%s\n", md.toString().c_str()); FILE *file; file = fopen(__FILE__, "rb"); if(file) { char buf[512]; while(!feof(file)) { size_t size = fread(buf, 1, 512, file); md.update(buf, size); } fclose(file); md.final(); printf("file md5:%s\n", md.toString().c_str()); return 0; } puts("open file error!"); return 1; }

Sha1算法C++实现代码，117行超短代码参考了rfc3174上的代码和网上某些代码，重写了sha1算法实现，可以计算大文件的hash值，通过64位编译测试，接口简单，代码进行了注释，gcc编译测试通过。 #include <cstdio> #include <string> #ifdef _MSC_VER typedef unsigned int uint32_t; typedef unsigned char uint8_t; typedef unsigned __int64 uint64_t; #else #include <stdint.h> #endif #define SHA1_MAX 2305843009213693952L class Sha1 { uint64_t _size; uint32_t _hashs[5]; uint8_t _block[64]; short _index; public: Sha1(){ init();} ~Sha1(){} void init() { _hashs[0] = 0x67452301; _hashs[1] = 0xefcdab89; _hashs[2] = 0x98badcfe; _hashs[3] = 0x10325476; _hashs[4] = 0xc3d2e1f0; _index = 0; _size = 0; } // 数据更新 bool update(const char *data, size_t len) { while(len--) { _block[_index++] = *data++; //拷贝数据 if(_size > SHA1_MAX) return false; //超出2^64计数范围 _size++; //计数器+1; if(_index == 64){//一个块填充完成则进行hash计算 _hash(); } } return true; } // 结束计算 void final() { _block[_index++] = 0x80;//添加结束府 if(_index > 55){//如果后8位不够填充bit总数，则填0 _fill_zero(64); _hash(); } _fill_zero(56); //最后8字节前填充0 _size <<= 3;//计算总bit数 for(int i = 56; i >=0; i-=8)//最后8字节填充big endian总bit数 _block[_index++] = (uint8_t)(_size >> i); _hash();//结束计算 printf("%8x%8x%8x%8x%8x\n", _hashs[0],_hashs[1],_hashs[2],_hashs[3],_hashs[4]); } private: void _fill_zero(int n) { while(_index < n) _block[_index++]=0; } // sha1核心算法 void _hash() { uint32_tF, K, T = 0; uint32_tt, W[80] = {0}; uint32_tA=_hashs[0],B=_hashs[1],C=_hashs[2],D=_hashs[3],E=_hashs[4]; // 前16个字为原始数据 for (t = 0; t < 16; ++t) { W[t]=(_block[t<<2] ) << 24 |(_block[(t<<2)+1] ) << 16 |(_block[(t<<2)+2] ) << 8 | _block[(t<<2)+3] ; } // 填充 for (t = 16; t < 80; ++t){ W[t] = _lShift(W[t-3] ^ W[t-8] ^ W[t-14] ^ W[t-16], 1); } // 重复80次hash计算 for (t = 0; t < 80; ++t) { switch (t/20) { case 0: K=0x5a827999; F = (B & C) | ((~B) & D); break; case 1: K=0x6ed9eba1;F = B ^ C ^ D; break; case 2: K=0x8f1bbcdc; F = (B & C) | (B & D) | (C & D); break; case 3: K=0xca62c1d6; F = B ^ C ^ D; break; } T = _lShift(A, 5) + F + E + W[t] + K; E=D; D=C; C = _lShift(B, 30); B=A; A=T; } _index = 0; //清零 // 累加计算结果 _hashs[0] += A; _hashs[1] += B; _hashs[2] += C; _hashs[3] += D; _hashs[4] += E; } // 循环左移 template<typename T> T _lShift(T val, int bits) { return (val << bits) | (val >> ((sizeof(T)<<3) - bits)); } }; int main() { Sha1 s; s.update("grape", 5); s.final(); return 0; }

发下代码试试，500行左右的string 无聊，写了一下unicode的string实现。 //删除，插入，查找，替换，取子串, Trim, 迭代器 // 还差替换和 char转换unicode, char或许是utf-8, int 和保留功能有点重合 class String { typedef struct _format_args { int prec; int width; wchar_t fill; }_fargs; public: String() :_size(0), _capacity(_size+1), _data(new wchar_t[_capacity]) { *_data = L'\0'; } // 容器需要 String(const String& rhs) :_size(rhs._size), _capacity(_size+1), _data(new wchar_t[_capacity]) { _copy(_data, rhs._data, _size); } String(const wchar_t *str) { for(_size = 0; str[_size]; ++_size); _capacity = _size + 1; _data = new wchar_t[_capacity]; _copy(_data, str, _size); } String(const wchar_t ch) :_size(1), _capacity(_size+1), _data(new wchar_t[_capacity]) { *_data = ch; *(_data+1) = L'\0'; } //这里有必要么？。 String(int val) :_size(0), _capacity(12), _data(new wchar_t[_capacity]) { bool sign = false; if(val < 0){ sign = true; val = -val; } wchar_t *p = _uitoa(_data, val, 10, sign); _size = _data - p; } String(const char *str) { unsigned int code_page = 936; _size = MultiByteToWideChar(code_page, 0, str, -1, NULL, 0); _capacity = _size; _data = new wchar_t[_size]; MultiByteToWideChar(code_page, 0, str, -1, _data, _size--); } ~String() { delete[] _data; } ////////////////////////////////////////////////////// String& operator=(String rhs) // pass by value { swap(rhs); _size = rhs._size; _capacity = _size + 1; return *this; } String operator+(const String &rhs) { String tmp(_data); //没有进行测同，因为无意义 tmp += rhs; return tmp; } String& operator+=(const String& rhs) { _data = _renew(_data, _size, _size + rhs._size); _copy(_data+_size, rhs._data, rhs._size); _size += rhs._size; _capacity = _size + 1; return *this; } wchar_t& operator[](size_t index) const { return _data[index]; } bool operator==(const String& rhs) { return _cmp(_data, rhs._data) == 0; } bool operator >(const String& rhs) { return _cmp(_data, rhs._data) > 0; } bool operator < (const String& rhs) { return _cmp(_data, rhs._data) < 0; }

【分享2】mp3解码核心代码与文件映射函数在播放中的应用一楼祭度娘。

[分享]整理的JPEG解码代码(700行左右)和学习笔记一楼祭度娘

神魔井社团作品《花千骨》Cos MV 剧，喜欢原文的戳进来看看吧。你还在等电视剧版的花千骨？？不好意思的说句。。别等了。。。。拥有下面这部足矣：视频来自：http://tieba.baidu.com/mo/q/checkurl?url=http%3A%2F%2Fwww.tudou.com%2Fprograms%2Fview%2F3lgKfcPZu80%2F&urlrefer=abda021716052d57e6cdcdc6a6ee99d2 anyone 看哭了有木有？后面花絮看笑了有木有？