@TechReport{ it:2004-005, author = {H{\aa}kan Zeffer and Zoran Radovic and Oskar Grenholm and Erik Hagersten}, title = {Evaluation, Implementation and Performance of Write Permission Caching in the {DSZOOM} System}, institution = {Department of Information Technology, Uppsala University}, department = {Division of Computer Systems}, year = {2004}, number = {2004-005}, month = feb, note = {Updated June 2004}, abstract = {Fine-grained software-based distributed shared memory (SW-DSM) systems typically maintain coherence with in-line checking code at load and store operations to shared memory. The instrumentation overhead of this added checking code can be severe. This paper (1) shows that most of the instrumentation overhead in the fine-grained DSZOOM SW-DSM system is store related, (2) introduces a new write permission cache (WPC) technique that exploits spatial store locality and batches coherence actions at runtime, (3) evaluates WPC and (4) presents WPC results when implemented in a real SW-DSM system. On average, the WPC reduces the store instrumentation overhead in DSZOOM with 42~(67) percent for benchmarks compiled with maximum (minimum) compiler optimizations.} }