Thread-level parallelism is an increasingly popular target for improving computer system performance; architectures such as chip multiprocessors and multithreaded cores are designed to take advantage of parallel threads within a single chip. The performance of existing single-threaded programs can be improved with automatic parallelization and thread-level speculation; however, overheads associated with speculation can be a major hurdle towards achieving significant performance gains. This thesis investigates module-level parallelism, or spawning a speculative thread running the module continuation in parallel with the called module. The results of simulations with an ideal speculative chip multiprocessor show that execution time can potentially be, on average, halved with module-level parallelism, but that misspeculations are common and that speculation overheads dominate the execution time if new threads are started for every module continuation. The second technique is misspeculation prediction and is aimed at bringing down total overhead. The selection of module calls used for speculation is based on whether or not spawning a new thread is expected to result in a misspeculation. When spawning threads for all continuations the total overhead is on average 336% extra cycles compared to sequential execution; with misspeculation prediction, the average overhead can be brought down to 54%. The proposed techniques can be applied in run-time to speed up existing applications on chip multiprocessors with thread-level speculation support.
@phdthesis{MLSpecExec_lic, title = {Licentiate Thesis: Module-Level Speculative Execution Techniques on Chip Multiprocessors}, author = {Warg, Fredrik}, year = {2003}, month = {06}, abstract = {Thread-level parallelism is an increasingly popular target for improving computer system performance; architectures such as chip multiprocessors and multithreaded cores are designed to take advantage of parallel threads within a single chip. The performance of existing single-threaded programs can be improved with automatic parallelization and thread-level speculation; however, overheads associated with speculation can be a major hurdle towards achieving significant performance gains. This thesis investigates module-level parallelism, or spawning a speculative thread running the module continuation in parallel with the called module. The results of simulations with an ideal speculative chip multiprocessor show that execution time can potentially be, on average, halved with module-level parallelism, but that misspeculations are common and that speculation overheads dominate the execution time if new threads are started for every module continuation. The second technique is misspeculation prediction and is aimed at bringing down total overhead. The selection of module calls used for speculation is based on whether or not spawning a new thread is expected to result in a misspeculation. When spawning threads for all continuations the total overhead is on average 336% extra cycles compared to sequential execution; with misspeculation prediction, the average overhead can be brought down to 54%. The proposed techniques can be applied in run-time to speed up existing applications on chip multiprocessors with thread-level speculation support.}, keywords = {Chip multiprocessors, thread-level speculation, module-level parallelism, module run-length prediction, misspeculation prediction, value prediction}, school = {Chalmers University of Technology}, note = {Publication data: https://warg.org/fredrik/publ/} }