ftparchive/http/cdrom: align structures for reduce sizes in CPU cacheline
This PR will decrease costs copying, moving, and creating object-structures only for common 64bit processors due to the 8-byte data alignment.
Smaller size structure or class, higher chance putting into CPU cache. Most processors are already 64 bit, so the change won't make it any worse.
Pahole example output:
- Comment
/* XXX {n} bytes hole, try to pack */
shows where optimization is possible by rearranging the order of fields structures and classes
Master branch
struct PackageMap {
string BaseDir; /* 0 32 */
string InternalPrefix; /* 32 32 */
/* --- cacheline 1 boundary (64 bytes) --- */
string FLFile; /* 64 32 */
string PkgExt; /* 96 32 */
/* --- cacheline 2 boundary (128 bytes) --- */
string SrcExt; /* 128 32 */
string PkgFile; /* 160 32 */
/* --- cacheline 3 boundary (192 bytes) --- */
string BinCacheDB; /* 192 32 */
string SrcCacheDB; /* 224 32 */
/* --- cacheline 4 boundary (256 bytes) --- */
string BinOverride; /* 256 32 */
string ExtraOverride; /* 288 32 */
/* --- cacheline 5 boundary (320 bytes) --- */
string Arch; /* 320 32 */
bool IncludeArchAll; /* 352 1 */
/* XXX 7 bytes hole, try to pack */
string SrcFile; /* 360 32 */
/* --- cacheline 6 boundary (384 bytes) was 8 bytes ago --- */
string SrcOverride; /* 392 32 */
string SrcExtraOverride; /* 424 32 */
/* --- cacheline 7 boundary (448 bytes) was 8 bytes ago --- */
bool LongDesc; /* 456 1 */
/* XXX 7 bytes hole, try to pack */
class TranslationWriter * TransWriter; /* 464 8 */
string Contents; /* 472 32 */
string ContentsHead; /* 504 32 */
/* --- cacheline 8 boundary (512 bytes) was 24 bytes ago --- */
string Tag; /* 536 32 */
string PkgCompress; /* 568 32 */
/* --- cacheline 9 boundary (576 bytes) was 24 bytes ago --- */
string CntCompress; /* 600 32 */
string SrcCompress; /* 632 32 */
/* --- cacheline 10 boundary (640 bytes) was 24 bytes ago --- */
string PathPrefix; /* 664 32 */
unsigned int DeLinkLimit; /* 696 4 */
mode_t Permissions; /* 700 4 */
/* --- cacheline 11 boundary (704 bytes) --- */
bool ContentsDone; /* 704 1 */
bool PkgDone; /* 705 1 */
bool SrcDone; /* 706 1 */
/* XXX 5 bytes hole, try to pack */
time_t ContentsMTime; /* 712 8 */
/* size: 720, cachelines: 12, members: 30 */
/* sum members: 701, holes: 3, sum holes: 19 */
/* last cacheline: 16 bytes */
};
struct RequestState {
unsigned int Major; /* 0 4 */
unsigned int Minor; /* 4 4 */
unsigned int Result; /* 8 4 */
char Code[360]; /* 12 360 */
/* XXX 4 bytes hole, try to pack */
/* --- cacheline 5 boundary (320 bytes) was 56 bytes ago --- */
long long unsigned int TotalFileSize; /* 376 8 */
/* --- cacheline 6 boundary (384 bytes) --- */
long long unsigned int DownloadSize; /* 384 8 */
long long unsigned int JunkSize; /* 392 8 */
long long unsigned int StartPos; /* 400 8 */
long long unsigned int MaximumSize; /* 408 8 */
time_t Date; /* 416 8 */
enum HaveContent haveContent; /* 424 4 */
enum {
Closes = 0,
Chunked = 1,
Stream = 2,
} Encoding; /* 428 4 */
enum {
Header = 0,
Data = 1,
} State; /* 432 4 */
/* XXX 4 bytes hole, try to pack */
string Location; /* 440 32 */
/* --- cacheline 7 boundary (448 bytes) was 24 bytes ago --- */
time_t RetryAfter; /* 472 8 */
class FileFd File; /* 480 0 */
/* XXX 96 bytes hole, try to pack */
/* --- cacheline 9 boundary (576 bytes) --- */
const class BaseHttpMethod * Owner; /* 576 8 */
const struct ServerState * Server; /* 584 8 */
/* size: 592, cachelines: 10, members: 18 */
/* sum members: 488, holes: 3, sum holes: 104 */
/* last cacheline: 16 bytes */
};
This PR
struct PackageMap {
string BaseDir; /* 0 32 */
string InternalPrefix; /* 32 32 */
/* --- cacheline 1 boundary (64 bytes) --- */
string FLFile; /* 64 32 */
string PkgExt; /* 96 32 */
/* --- cacheline 2 boundary (128 bytes) --- */
string SrcExt; /* 128 32 */
string PkgFile; /* 160 32 */
/* --- cacheline 3 boundary (192 bytes) --- */
string BinCacheDB; /* 192 32 */
string SrcCacheDB; /* 224 32 */
/* --- cacheline 4 boundary (256 bytes) --- */
string BinOverride; /* 256 32 */
string ExtraOverride; /* 288 32 */
/* --- cacheline 5 boundary (320 bytes) --- */
string SrcFile; /* 320 32 */
string SrcOverride; /* 352 32 */
/* --- cacheline 6 boundary (384 bytes) --- */
string SrcExtraOverride; /* 384 32 */
string Contents; /* 416 32 */
/* --- cacheline 7 boundary (448 bytes) --- */
string ContentsHead; /* 448 32 */
string Tag; /* 480 32 */
/* --- cacheline 8 boundary (512 bytes) --- */
string PkgCompress; /* 512 32 */
string CntCompress; /* 544 32 */
/* --- cacheline 9 boundary (576 bytes) --- */
string SrcCompress; /* 576 32 */
string PathPrefix; /* 608 32 */
/* --- cacheline 10 boundary (640 bytes) --- */
unsigned int DeLinkLimit; /* 640 4 */
mode_t Permissions; /* 644 4 */
string Arch; /* 648 32 */
class TranslationWriter * TransWriter; /* 680 8 */
bool IncludeArchAll; /* 688 1 */
bool LongDesc; /* 689 1 */
bool ContentsDone; /* 690 1 */
bool PkgDone; /* 691 1 */
bool SrcDone; /* 692 1 */
/* XXX 3 bytes hole, try to pack */
time_t ContentsMTime; /* 696 8 */
/* size: 704, cachelines: 11, members: 30 */
/* sum members: 701, holes: 1, sum holes: 3 */
};
struct RequestState {
unsigned int Major; /* 0 4 */
unsigned int Minor; /* 4 4 */
unsigned int Result; /* 8 4 */
char Code[360]; /* 12 360 */
/* --- cacheline 5 boundary (320 bytes) was 52 bytes ago --- */
enum {
Header = 0,
Data = 1,
} State; /* 372 4 */
long long unsigned int TotalFileSize; /* 376 8 */
/* --- cacheline 6 boundary (384 bytes) --- */
long long unsigned int DownloadSize; /* 384 8 */
long long unsigned int JunkSize; /* 392 8 */
long long unsigned int StartPos; /* 400 8 */
long long unsigned int MaximumSize; /* 408 8 */
time_t Date; /* 416 8 */
enum HaveContent haveContent; /* 424 4 */
enum {
Closes = 0,
Chunked = 1,
Stream = 2,
} Encoding; /* 428 4 */
string Location; /* 432 32 */
/* --- cacheline 7 boundary (448 bytes) was 16 bytes ago --- */
time_t RetryAfter; /* 464 8 */
class FileFd File; /* 472 0 */
const struct ServerState * Server; /* 472 8 */
/* XXX 88 bytes hole, try to pack */
/* --- cacheline 8 boundary (512 bytes) was 56 bytes ago --- */
const class BaseHttpMethod * Owner; /* 568 8 */
/* size: 576, cachelines: 9, members: 18 */
/* sum members: 488, holes: 1, sum holes: 88 */
}; /* saved 16 bytes and 1 cacheline! */
Hint:
Pahole struct/class analyzer reported that it was possible to RequestState
reduce by 16 bytes, but I was only able to align by 8 bytes, if someone suggests a better alignment fields by another 8 bytes, this will save 1 cacheline in http network stack APT project.
Info about technique:
https://hpc.rz.rptu.de/Tutorials/AVX/alignment.shtml
https://en.wikipedia.org/wiki/Data_structure_alignment
https://stackoverflow.com/a/20882083
https://zijishi.xyz/post/optimization-technique/learning-to-use-data-alignment/
Affected structs:
- PackageMap 720 -> 704 bytes (16 bytes!!! Save 1 full CPU cacheline)
- RequestState 592 -> 584 bytes
- ServerState 384 -> 376 bytes
- CDROMMethod 320 -> 312 bytes
- FTWScanner 168 -> 160 bytes