*mystring=sdsnewlen("abc",3);**Youcanprintthestringwithprintf()asthereisanimplicit\0atthe*endofthestring.Howeverthestringisbinarysafeandcancontain*\0charactersinthemiddle,asthelengthisstoredinthesdsheader.*/sdssdsnewlen(constvoid*init,size_tinitlen){structsdshdr*sh;if(init){sh=zmalloc(sizeof(structsdshdr)+initlen+1);}else{sh=zcalloc(sizeof(structsdshdr)+initlen+1);}if(sh==NULL)returnNULL;sh->len=initlen;sh->free=0;if(initlen&&init)memcpy(sh->buf,init,initlen);sh->buf[initlen]='\0';return(char*)sh->buf;}/* Create a new sds string starting from a null termined C string. */sdssdsnew(constchar*init){size_tinitlen=(init==NULL)?0:strlen(init);returnsdsnewlen(init,initlen);
从 SDS 的创建逻辑中可以看出
SDS 遵循 C 字符串以空字符("\0")结尾。
SDS 中的 len 属性(sds.h/sdslen)同 C 字符串函数 strlen 返回结果相同,即不计算尾部空字符。
这样 SDS 就可以直接重用一部分 C 字符串函数库里面的函数(如打印,显示类函数,<stdio.h>/printf),而字符串的修改操作,则使用 SDS 自定义优化后的函数。
常数复杂度获取字符串长度
C 获取一个 C 字符串的长度,程序必须遍历整个字符串,直到遇到代表字符串结尾的空字符串位置,这个操作的复杂度为 O(N)。
但是对于 SDS 来说,获取字符串长度只需要访问 SDS 中的 len 属性。复杂度仅为 O(1),确保了获取字符串长度这样的高频操作不会成为 Redis 性能瓶颈。
对于 SDS 的修改操作,SDS 会实时维护 len 属性,如 sds.c/sdscat(追加 C 字符串到 SDS 字符串)
/* Append the specified binary-safe string pointed by 't' of 'len' bytes to the
* end of the specified sds string 's'.
*
* After the call, the passed sds string is no longer valid and all the
* references must be substituted with the new pointer returned by the call. */sdssdscatlen(sdss,constvoid*t,size_tlen){structsdshdr*sh;size_tcurlen=sdslen(s);s=sdsMakeRoomFor(s,len);if(s==NULL)returnNULL;sh=(void*)(s-(sizeof(structsdshdr)));memcpy(s+curlen,t,len);sh->len=curlen+len;sh->free=sh->free-len;s[curlen+len]='\0';returns;}/* Append the specified sds 't' to the existing sds 's'.
*
* After the call, the modified sds string is no longer valid and all the
* references must be substituted with the new pointer returned by the call. */sdssdscatsds(sdss,constsdst){returnsdscatlen(s,t,sdslen(t));}
杜绝缓冲区溢出
C 字符串由于不记录自身长度,对其进行修改操作容易造成缓冲区溢出(buffer overflow)。如 C 字符串拼接函数 <stdio.h>/strcat,内存中相邻的字符串 s1 和 s2,对 s1 字符串做拼接操作时,如果没有提前为 s1 分配足够的空间,则 s2 保存的内容会被意外修改。
/* Enlarge the free space at the end of the sds string so that the caller
* is sure that after calling this function can overwrite up to addlen
* bytes after the end of the string, plus one more byte for nul term.
*
* Note: this does not change the *length* of the sds string as returned
* by sdslen(), but only the free buffer space we have. */sdssdsMakeRoomFor(sdss,size_taddlen){structsdshdr*sh,*newsh;size_tfree=sdsavail(s);size_tlen,newlen;if(free>=addlen)returns;len=sdslen(s);sh=(void*)(s-(sizeof(structsdshdr)));newlen=(len+addlen);if(newlen<SDS_MAX_PREALLOC)newlen*=2;elsenewlen+=SDS_MAX_PREALLOC;newsh=zrealloc(sh,sizeof(structsdshdr)+newlen+1);if(newsh==NULL)returnNULL;newsh->free=newlen-len;returnnewsh->buf;}
空间预分配
可以发现,在 SDS API 进行字符串新增逻辑中会给 SDS 重新分配 free 空间。
如果 SDS 的长度(len 属性)小于 SDS_MAX_PREALLOC(1024KB=1M),则会分配和 len 属性同样大小的未使用空间给 buf,这时 SDS 的 len 属性和 free 属性值相同。
/* Remove the part of the string from left and from right composed just of
* contiguous characters found in 'cset', that is a null terminted C string.
*
* After the call, the modified sds string is no longer valid and all the
* references must be substituted with the new pointer returned by the call.
*
* Example:
*
* s = sdsnew("AA...AA.a.aa.aHelloWorld :::");
* s = sdstrim(s,"A. :");
* printf("%s\n", s);
*
* Output will be just "Hello World".
*/sdssdstrim(sdss,constchar*cset){structsdshdr*sh=(void*)(s-(sizeof(structsdshdr)));char*start,*end,*sp,*ep;size_tlen;sp=start=s;ep=end=s+sdslen(s)-1;while(sp<=end&&strchr(cset,*sp))sp++;while(ep>start&&strchr(cset,*ep))ep--;len=(sp>ep)?0:((ep-sp)+1);if(sh->buf!=sp)memmove(sh->buf,sp,len);sh->buf[len]='\0';sh->free=sh->free+(sh->len-len);sh->len=len;returns;}
/* Reallocate the sds string so that it has no free space at the end. The
* contained string remains not altered, but next concatenation operations
* will require a reallocation.
*
* After the call, the passed sds string is no longer valid and all the
* references must be substituted with the new pointer returned by the call. */sdssdsRemoveFreeSpace(sdss){structsdshdr*sh;sh=(void*)(s-(sizeof(structsdshdr)));sh=zrealloc(sh,sizeof(structsdshdr)+sh->len+1);sh->free=0;returnsh->buf;}/* The client query buffer is an sds.c string that can end with a lot of
* free space not used, this function reclaims space if needed.
*
* The function always returns 0 as it never terminates the client. */intclientsCronResizeQueryBuffer(redisClient*c){size_tquerybuf_size=sdsAllocSize(c->querybuf);time_tidletime=server.unixtime-c->lastinteraction;/* There are two conditions to resize the query buffer:
* 1) Query buffer is > BIG_ARG and too big for latest peak.
* 2) Client is inactive and the buffer is bigger than 1k. */if(((querybuf_size>REDIS_MBULK_BIG_ARG)&&(querybuf_size/(c->querybuf_peak+1))>2)||(querybuf_size>1024&&idletime>2)){/* Only resize the query buffer if it is actually wasting space. */if(sdsavail(c->querybuf)>1024){c->querybuf=sdsRemoveFreeSpace(c->querybuf);}}/* Reset the peak again to capture the peak memory usage in the next
* cycle. */c->querybuf_peak=0;return0;}