User Tools

Site Tools


melange:papers:fall2015

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

melange:papers:fall2015 [2015/11/10 09:40] (current)
Line 1: Line 1:
 +@inproceedings{Acharya:2015:PNM:2688500.2688512,
 + author = {Acharya, Aravind and Bondhugula, Uday},
 + title = {PLUTO+: Near-complete Modeling of Affine Transformations for
 +Parallelism and Locality},
 + booktitle = {Proceedings of the 20th ACM SIGPLAN Symposium on Principles and
 +Practice of Parallel Programming},
 + series = {PPoPP 2015},
 + year = {2015},
 + isbn = {978-1-4503-3205-7},
 + location = {San Francisco, CA, USA},
 + pages = {54--64},
 + numpages = {11},
 + url = {http://doi.acm.org/10.1145/2688500.2688512},
 + doi = {10.1145/2688500.2688512},
 + acmid = {2688512},
 + publisher = {ACM},
 + address = {New York, NY, USA},
 + keywords = {Affine transformations, affine scheduling, automatic
 +parallelization, polyhedral model, stencil computations, tiling},
 +
 +
 +@INPROCEEDINGS{7161519, 
 + author={Tithi, J.J. and Ganapathi, P. and Talati, A. and Aggarwal, S. and Chowdhury, R.}, 
 + booktitle={Parallel and Distributed Processing Symposium (IPDPS), 2015 IEEE International}, 
 + title={High-Performance Energy-Efficient Recursive Dynamic Programming with Matrix-Multiplication-Like Flexible Kernels}, 
 + year={2015}, 
 + month={May}, 
 + pages={303-312}, 
 + keywords={divide and conquer methods;dynamic programming;mathematics computing;matrix multiplication;parallel algorithms;DP problem;FW-APSP;Floyd-Warshall all-pairs shortest path;cache-oblivious recursive divide-and-conquer;dynamic programming;gap penalty;high-performing parallel implementation;matrix-multiplication-like flexible kernel;optimization;parallel CORDAC algorithm;cache-oblivious;divide-and-conquer;dynamic programming;flexible kernel;polyhedral compiler;recursive}, 
 + doi={10.1109/IPDPS.2015.107}, 
 + ISSN={1530-2075},
 +}
 +
 +@inproceedings{Bondhugula:2014:TOT:2628071.2628106,
 + author = {Bondhugula, Uday and Bandishti, Vinayaka and Cohen, Albert and Potron, Guillain and Vasilache, Nicolas},
 + title = {Tiling and Optimizing Time-iterated Computations on Periodic Domains},
 + booktitle = {Proceedings of the 23rd International Conference on Parallel Architectures and Compilation},
 + series = {PACT '14},
 + year = {2014},
 + isbn = {978-1-4503-2809-8},
 + location = {Edmonton, AB, Canada},
 + pages = {39--50},
 + numpages = {12},
 + url = {http://doi.acm.org/10.1145/2628071.2628106},
 + doi = {10.1145/2628071.2628106},
 + acmid = {2628106},
 + publisher = {ACM},
 + address = {New York, NY, USA},
 + keywords = {automatic parallelization, periodic, polyhedral model, stencils, tiling},
 +
 +
 +
 +@inproceedings{Tang:2015:CWI:2688500.2688514,
 + author = {Tang, Yuan and You, Ronghui and Kan, Haibin and Tithi, Jesmin Jahan and Ganapathi, Pramod and Chowdhury, Rezaul A.},
 + title = {Cache-oblivious Wavefront: Improving Parallelism of Recursive Dynamic Programming Algorithms Without Losing Cache-efficiency},
 + booktitle = {Proceedings of the 20th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming},
 + series = {PPoPP 2015},
 + year = {2015},
 + isbn = {978-1-4503-3205-7},
 + location = {San Francisco, CA, USA},
 + pages = {205--214},
 + numpages = {10},
 + url = {http://doi.acm.org/10.1145/2688500.2688514},
 + doi = {10.1145/2688500.2688514},
 + acmid = {2688514},
 + publisher = {ACM},
 + address = {New York, NY, USA},
 + keywords = {Cilk, cache-oblivious parallel algorithms, cache-oblivious wavefront, dynamic programming, multi-core, nested parallel computation},
 +}
 +
 +
 +@inproceedings{Huang:2014:ARD:2628071.2628089,
 + author = {Huang, Cheng-Chieh and Nagarajan, Vijay},
 + title = {ATCache: Reducing DRAM Cache Latency via a Small SRAM Tag Cache},
 + booktitle = {Proceedings of the 23rd International Conference on Parallel Architectures and Compilation},
 + series = {PACT '14},
 + year = {2014},
 + isbn = {978-1-4503-2809-8},
 + location = {Edmonton, AB, Canada},
 + pages = {51--60},
 + numpages = {10},
 + url = {http://doi.acm.org/10.1145/2628071.2628089},
 + doi = {10.1145/2628071.2628089},
 + acmid = {2628089},
 + publisher = {ACM},
 + address = {New York, NY, USA},
 + keywords = {design, dram cache, performance},
 +}
 +
 +@inproceedings{Fatehi:2014:ITS:2628071.2628093,
 + author = {Fatehi, Ehsan and Gratz, Paul},
 + title = {ILP and TLP in Shared Memory Applications: A Limit Study},
 + booktitle = {Proceedings of the 23rd International Conference on Parallel Architectures and Compilation},
 + series = {PACT '14},
 + year = {2014},
 + isbn = {978-1-4503-2809-8},
 + location = {Edmonton, AB, Canada},
 + pages = {113--126},
 + numpages = {14},
 + url = {http://doi.acm.org/10.1145/2628071.2628093},
 + doi = {10.1145/2628071.2628093},
 + acmid = {2628093},
 + publisher = {ACM},
 + address = {New York, NY, USA},
 + keywords = {instruction-level parallelism (ilp), limits, pthreads, thread-level parallelism (tlp)},
 +
 +
 +
 +@inproceedings{Cameron:2014:BDP:2628071.2628079,
 + author = {Cameron, Robert D. and Shermer, Thomas C. and Shriraman, Arrvindh and Herdy, Kenneth S. and Lin, Dan and Hull, Benjamin R. and Lin, Meng},
 + title = {Bitwise Data Parallelism in Regular Expression Matching},
 + booktitle = {Proceedings of the 23rd International Conference on Parallel Architectures and Compilation},
 + series = {PACT '14},
 + year = {2014},
 + isbn = {978-1-4503-2809-8},
 + location = {Edmonton, AB, Canada},
 + pages = {139--150},
 + numpages = {12},
 + url = {http://doi.acm.org/10.1145/2628071.2628079},
 + doi = {10.1145/2628071.2628079},
 + acmid = {2628079},
 + publisher = {ACM},
 + address = {New York, NY, USA},
 + keywords = {parallel bit streams, regular expression matching},
 +}
 +
 +@inproceedings{Ansel:2014:OEF:2628071.2628092,
 + author = {Ansel, Jason and Kamil, Shoaib and Veeramachaneni, Kalyan and Ragan-Kelley, Jonathan and Bosboom, Jeffrey and O'Reilly, Una-May and Amarasinghe, Saman},
 + title = {OpenTuner: An Extensible Framework for Program Autotuning},
 + booktitle = {Proceedings of the 23rd International Conference on Parallel Architectures and Compilation},
 + series = {PACT '14},
 + year = {2014},
 + isbn = {978-1-4503-2809-8},
 + location = {Edmonton, AB, Canada},
 + pages = {303--316},
 + numpages = {14},
 + url = {http://doi.acm.org/10.1145/2628071.2628092},
 + doi = {10.1145/2628071.2628092},
 + acmid = {2628092},
 + publisher = {ACM},
 + address = {New York, NY, USA},
 + keywords = {autotuner, optimization},
 +}
 +
 +
 +@INPROCEEDINGS{6835968, 
 + author={Hayenga, M. and Naresh, V.R.K. and Lipasti, M.H.}, 
 + booktitle={High Performance Computer Architecture (HPCA), 2014 IEEE 20th International Symposium}, 
 + title={Revolver: Processor architecture for power efficient loop execution}, 
 + year={2014}, 
 + month={Feb}, 
 + pages={591-602}, 
 + keywords={computer architecture;energy conservation;instruction sets;power aware computing;Revolver architecture;branch prediction;dispatch logic;energy efficiency;frontend instruction dispatches;instruction fetch;loop buffers;loop execution;loop iterations;micro-op cache techniques;out-of-order execution core;out-of-order processor architecture;power efficient loop execution;processor core;processor frontend;static instruction instances;Arrays;Clocks;Out of order;Pipelines;Rain;Registers;Resource management}, 
 + doi={10.1109/HPCA.2014.6835968},
 +}
 +
 +
 +@INPROCEEDINGS{5377644, 
 + author={Shafiq, M. and Pericas, M. and de la Cruz, R. and Araya-Polo, M. and Navarro, N. and Ayguade, E.}, 
 + booktitle={Field-Programmable Technology, 2009. FPT 2009.}, 
 + title={Exploiting memory customization in FPGA for 3D stencil computations}, 
 + year={2009}, 
 + month={Dec}, 
 + pages={38-45}, 
 + keywords={field programmable gate arrays;signal processing;3D stencil computations;FPGA;IBM PowerXCell 8i;data reuse;memory customization;memory organization;memory-bound kernels;Bandwidth;Computer applications;Field programmable gate arrays;Finite difference methods;Finite impulse response filter;Hardware;Kernel;Nearest neighbor searches;Throughput;Time domain analysis}, 
 + doi={10.1109/FPT.2009.5377644},
 +}
 +
 +
 +@inproceedings{Wahib:2015:AGK:2749246.2749255,
 + author = {Wahib, Mohamed and Maruyama, Naoya},
 + title = {Automated GPU Kernel Transformations in Large-Scale Production Stencil Applications},
 + booktitle = {Proceedings of the 24th International Symposium on High-Performance Parallel and Distributed Computing},
 + series = {HPDC '15},
 + year = {2015},
 + isbn = {978-1-4503-3550-8},
 + location = {Portland, Oregon, USA},
 + pages = {259--270},
 + numpages = {12},
 + url = {http://doi.acm.org/10.1145/2749246.2749255},
 + doi = {10.1145/2749246.2749255},
 + acmid = {2749255},
 + publisher = {ACM},
 + address = {New York, NY, USA},
 + keywords = {cuda, gpu, source-to-source translation, stencil computations},
 +}
 +
 +
 +@inproceedings{Benson:2015:FPP:2688500.2688513,
 + author = {Benson, Austin R. and Ballard, Grey},
 + title = {A Framework for Practical Parallel Fast Matrix Multiplication},
 + booktitle = {Proceedings of the 20th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming},
 + series = {PPoPP 2015},
 + year = {2015},
 + isbn = {978-1-4503-3205-7},
 + location = {San Francisco, CA, USA},
 + pages = {42--53},
 + numpages = {12},
 + url = {http://doi.acm.org/10.1145/2688500.2688513},
 + doi = {10.1145/2688500.2688513},
 + acmid = {2688513},
 + publisher = {ACM},
 + address = {New York, NY, USA},
 + keywords = {dense linear algebra, fast matrix multiplication, parallel linear algebra, shared memory},
 +}
 +
 +
 +@inproceedings{Sukhija:2014:PSR:2672598.2672904,
 + author = {Sukhija, Nitin and Malone, Brandon and Srivastava, Srishti and Banicescu, Ioana and Ciorba, Florina M.},
 + title = {Portfolio-Based Selection of Robust Dynamic Loop Scheduling Algorithms Using Machine Learning},
 + booktitle = {Proceedings of the 2014 IEEE International Parallel \& Distributed Processing Symposium Workshops},
 + series = {IPDPSW '14},
 + year = {2014},
 + isbn = {978-1-4799-4116-2},
 + pages = {1638--1647},
 + numpages = {10},
 + url = {http://dx.doi.org/10.1109/IPDPSW.2014.183},
 + doi = {10.1109/IPDPSW.2014.183},
 + acmid = {2672904},
 + publisher = {IEEE Computer Society},
 + address = {Washington, DC, USA},
 + keywords = {Dynamic loop scheduling, robustness, algorithm selection, empirical robustness prediction models, machine learning techniques, variable system availability},
 +}
 +
 +@incollection{Tiwari2014,
 +year={2014},
 +isbn={978-3-319-09872-2},
 +booktitle={Euro-Par 2014 Parallel Processing},
 +series={Euro-Par 2014 Parallel Processing},
 +volume={8632},
 +editor={Silva, Fernando and Dutra, Ins and Santos Costa, Vtor},
 +doi={10.1007/978-3-319-09873-9_6},
 +title={Modeling the Impact of Reduced Memory Bandwidth on HPC Applications},
 +url={http://dx.doi.org/10.1007/978-3-319-09873-9_6},
 +publisher={Springer International Publishing},
 +author={Tiwari, Ananta and Gamst, Anthony and Laurenzano, MichaelA. and Schulz, Martin and Carrington, Laura},
 +pages={63-74},
 +language={English}
 +}
 +
 +
 +@INPROCEEDINGS{7056046, 
 + author={Agarwal, N. and Nellans, D. and O'Connor, M. and Keckler, S.W. and Wenisch, T.F.}, 
 + booktitle={High Performance Computer Architecture (HPCA), 2015 IEEE 21st International Symposium}, 
 + title={Unlocking bandwidth for GPUs in CC-NUMA systems}, 
 + year={2015}, 
 + month={Feb}, 
 + pages={354-365}, 
 + keywords={cache storage;graphics processing units;parallel processing;storage management;CC-NUMA GPU-CPU systems;CPU memory bandwidth;GDDR memory;GPU kernel;GPU memory bandwidth;GPU relaxed memory semantics;GPU-based HPC applications;aggressive memory prefetching;bandwidth balancing;hardware cache-coherence;memory-intensive GPU workloads;minimal hardware support;on-demand software page migration;oracular page placement;software runtime system;virtual address-based program locality;Bandwidth;Graphics processing units;Hardware;Memory management;Random access memory;Runtime}, 
 + doi={10.1109/HPCA.2015.7056046},
 +}
 +
 +
 +@INPROCEEDINGS{6270616, 
 + author={Changyou Zhang and Kun Huang and Xiang Cui and Yifeng Chen}, 
 + booktitle={Parallel and Distributed Processing Symposium Workshops PhD Forum (IPDPSW), 2012 IEEE 26th International}, 
 + title={Power-aware Programming with GPU Accelerators}, 
 + year={2012}, 
 + month={May}, 
 + pages={2443-2449}, 
 + keywords={graphics processing units;multi-threading;multiprocessing systems;power aware computing;ubiquitous computing;GPU accelerators;high-level program development;manycore processor;multithreaded processor;on-chip parallelism;parallel processor;power consumption values;power efficiency;power estimation;power-aware programming;processor computational power;processor memory bandwidth;program statements;ubiquitous computing;Bandwidth;Graphics processing unit;Hardware;Memory management;Message systems;Power demand;Power measurement;GPU;Power-aware;Primitive;Programming}, 
 + doi={10.1109/IPDPSW.2012.301},
 +}
 +
 +
 +@inproceedings{Fang:2014:TIX:2568088.2576799,
 + author = {Fang, Jianbin and Sips, Henk and Zhang, LiLun and Xu, Chuanfu and Che, Yonggang and Varbanescu, Ana Lucia},
 + title = {Test-driving Intel Xeon Phi},
 + booktitle = {Proceedings of the 5th ACM/SPEC International Conference on Performance Engineering},
 + series = {ICPE '14},
 + year = {2014},
 + isbn = {978-1-4503-2733-6},
 + location = {Dublin, Ireland},
 + pages = {137--148},
 + numpages = {12},
 + url = {http://doi.acm.org/10.1145/2568088.2576799},
 + doi = {10.1145/2568088.2576799},
 + acmid = {2576799},
 + publisher = {ACM},
 + address = {New York, NY, USA},
 + keywords = {experience with xeon phi, microbenchmarking, optimization, performance analysis},
 +
 +
 +                                              
 +@inproceedings{Ravishankar:2015:DMC:2688500.2688515,
 + author = {Ravishankar, Mahesh and Dathathri, Roshan and Elango, Venmugil and Pouchet, Louis-Noël and Ramanujam, J. and Rountev, Atanas and Sadayappan, P.},
 + title = {Distributed Memory Code Generation for Mixed Irregular/Regular Computations},
 + booktitle = {Proceedings of the 20th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming},
 + series = {PPoPP 2015},
 + year = {2015},
 + isbn = {978-1-4503-3205-7},
 + location = {San Francisco, CA, USA},
 + pages = {65--75},
 + numpages = {11},
 + url = {http://doi.acm.org/10.1145/2688500.2688515},
 + doi = {10.1145/2688500.2688515},
 + acmid = {2688515},
 + publisher = {ACM},
 + address = {New York, NY, USA},
 + keywords = {Distributed Memory, Inspector/Executor, Irregular Computation, Polyhedral Compilation},
 +}
 +
 +
 +@inproceedings{Cong:2014:OMS:2593069.2593090,
 + author = {Cong, Jason and Li, Peng and Xiao, Bingjun and Zhang, Peng},
 + title = {An Optimal Microarchitecture for Stencil Computation Acceleration Based on Non-Uniform Partitioning of Data Reuse Buffers},
 + booktitle = {Proceedings of the 51st Annual Design Automation Conference},
 + series = {DAC '14},
 + year = {2014},
 + isbn = {978-1-4503-2730-5},
 + location = {San Francisco, CA, USA},
 + pages = {77:1--77:6},
 + articleno = {77},
 + numpages = {6},
 + url = {http://doi.acm.org/10.1145/2593069.2593090},
 + doi = {10.1145/2593069.2593090},
 + acmid = {2593090},
 + publisher = {ACM},
 + address = {New York, NY, USA},
 +}
 +
 +@ARTICLE{6470606, 
 + author={Sano, K. and Hatsuda, Y. and Yamamoto, S.}, 
 + journal={Parallel and Distributed Systems, IEEE Transactions}, 
 + title={Multi-FPGA Accelerator for Scalable Stencil Computation with Constant Memory Bandwidth}, 
 + year={2014}, 
 + month={March}, 
 + volume={25}, 
 + number={3}, 
 + pages={695-705}, 
 + keywords={field programmable gate arrays;parallel processing;storage management;CCM;GPU;Jacobi computation;SSA;custom computing machine;deep pipelining approach;domain-specific programmable concept;field programmable gate array;graphics processing unit;high-performance stencil computations;memory bandwidth;multiFPGA accelerator;multicore microprocessors;scalable stencil computation;scalable streaming-array;scientific computations;Arrays;Bandwidth;Computational modeling;Field programmable gate arrays;Hardware;Scalability;FPGA;Scalable streaming-array;custom computing machine;high-performance computation;stencil computation}, 
 + doi={10.1109/TPDS.2013.51}, 
 + ISSN={1045-9219},
 +}
 +
 +
 +
 +@inproceedings{Fan:2006:IHE:1176254.1176322,
 + author = {Fan, Kevin and Kudlur, Manjunath and Park, Hyunchul and Mahlke,
 +Scott},
 + title = {Increasing Hardware Efficiency with Multifunction Loop
 +Accelerators},
 + booktitle = {Proceedings of the 4th International Conference on
 +Hardware/Software Codesign and System Synthesis},
 + series = {CODES+ISSS '06},
 + year = {2006},
 + isbn = {1-59593-370-0},
 + location = {Seoul, Korea},
 + pages = {276--281},
 + numpages = {6},
 + url = {http://doi.acm.org/10.1145/1176254.1176322},
 + doi = {10.1145/1176254.1176322},
 + acmid = {1176322},
 + publisher = {ACM},
 + address = {New York, NY, USA},
 + keywords = {application-specific hardware, high-level synthesis, loop
 +accelerator, modulo scheduling, multifunction design},
 +}
 +
 +@article{Meswani:2013:MPP:2493921.2493922,
 + author = {Meswani, Mitesh R. and Carrington, Laura and Unat, Didem and
 +Snavely, Allan and Baden, Scott and Poole, Stephen},
 + title = {Modeling and Predicting Performance of High Performance Computing
 +Applications on Hardware Accelerators},
 + journal = {Int. J. High Perform. Comput. Appl.},
 + issue_date = {May       2013},
 + volume = {27},
 + number = {2},
 + month = may,
 + year = {2013},
 + issn = {1094-3420},
 + pages = {89--108},
 + numpages = {20},
 + url = {http://dx.doi.org/10.1177/1094342012468180},
 + doi = {10.1177/1094342012468180},
 + acmid = {2493922},
 + publisher = {Sage Publications, Inc.},
 + address = {Thousand Oaks, CA, USA},
 + keywords = {FPGA, GPU, HPC, accelerators, benchmarking, idioms, performance
 +modeling, performance prediction},
 +}
 +
 +
 +@article{Nery:2013:HRM:2537182.2537569,
 + author = {Nery, Alexandre S. and Jozwiak, Lech and Lindwer, Menno and Cocco, Mauro and Nedjah, Nadia and Franca, Felipe M. G.},
 + title = {Hardware Reuse in Modern Application-specific Processors and
 +Accelerators},
 + journal = {Microprocess. Microsyst.},
 + issue_date = {August, 2013},
 + volume = {37},
 + number = {6-7},
 + month = aug,
 + year = {2013},
 + issn = {0141-9331},
 + pages = {684--692},
 + numpages = {9},
 + url = {http://dx.doi.org/10.1016/j.micpro.2012.06.005},
 + doi = {10.1016/j.micpro.2012.06.005},
 + acmid = {2537569},
 + publisher = {Elsevier Science Publishers B. V.},
 + address = {Amsterdam, The Netherlands, The Netherlands},
 + keywords = {Application-specific processors, Area reduction, Hardware
 +accelerator, Power reduction, Resource sharing},
 +}
 +
 + 
 +
 +@inproceedings{Bandishti:2012:TSC:2388996.2389051,
 + author = {Bandishti, Vinayaka and Pananilath, Irshad and Bondhugula, Uday},
 + title = {Tiling Stencil Computations to Maximize Parallelism},
 + booktitle = {Proceedings of the International Conference on High Performance
 +Computing, Networking, Storage and Analysis},
 + series = {SC '12},
 + year = {2012},
 + isbn = {978-1-4673-0804-5},
 + location = {Salt Lake City, Utah},
 + pages = {40:1--40:11},
 + articleno = {40},
 + numpages = {11},
 + url = {http://dl.acm.org/citation.cfm?id=2388996.2389051},
 + acmid = {2389051},
 + publisher = {IEEE Computer Society Press},
 + address = {Los Alamitos, CA, USA},
 + keywords = {compilers, program transformation},
 +}
 +
 +
 +@inproceedings{Wonnacott13,
 +    Author = {Dave G. Wonnacott and Michelle Mills Strout},
 +    Booktitle = {Proceedings of the 3rd International Workshop on Polyhedral Compilation Techniques (IMPACT)}, 
 + series = {IMPACT 2013},
 +    Month = {January},
 +    Title = {On the Scalability of Loop Tiling Techniques},
 +    Year = {2013},
 +    url={http://impact.gforge.inria.fr/impact2013/papers/impact2013_on_the_scalability_of_loop_tiling_techniques.pdf}
 +}
 +
 +
 +@inproceedings{Pugh:1991:OTF:125826.125848,
 + author = {Pugh, William},
 + title = {The Omega Test: A Fast and Practical Integer Programming Algorithm
 +for Dependence Analysis},
 + booktitle = {Proceedings of the 1991 ACM/IEEE Conference on Supercomputing},
 + series = {Supercomputing '91},
 + year = {1991},
 + isbn = {0-89791-459-7},
 + location = {Albuquerque, New Mexico, USA},
 + pages = {4--13},
 + numpages = {10},
 + url = {http://doi.acm.org/10.1145/125826.125848},
 + doi = {10.1145/125826.125848},
 + acmid = {125848},
 + publisher = {ACM},
 + address = {New York, NY, USA},
 +}
 +
 +
 +@inproceedings{cohenautomatic,
 + title={Automatic Intra-Array Storage Optimization},
 + author={Bhaskaracharya, Somashekaracharya G and Bondhugula, Uday and Cohen, Albert },
 + url={http://www.csa.iisc.ernet.in/TR/2014/3/paper.pdf},
 + series={IISc-CSA-TR-2014-3, Nov 2014 and submitted to ACM TOPLAS, Feb 2015},
 + year={2015},
 +    }
 +
 +@inproceedings{Stock:2014:FED:2594291.2594342,
 + author = {Stock, Kevin and Kong, Martin and Grosser, Tobias and Pouchet,
 +Louis-Noël and Rastello, Fabrice and Ramanujam, J. and Sadayappan, P.},
 + title = {A Framework for Enhancing Data Reuse via Associative Reordering},
 + booktitle = {Proceedings of the 35th ACM SIGPLAN Conference on Programming
 +Language Design and Implementation},
 + series = {PLDI '14},
 + year = {2014},
 + isbn = {978-1-4503-2784-8},
 + location = {Edinburgh, United Kingdom},
 + pages = {65--76},
 + numpages = {12},
 + url = {http://doi.acm.org/10.1145/2594291.2594342},
 + doi = {10.1145/2594291.2594342},
 + acmid = {2594342},
 + publisher = {ACM},
 + address = {New York, NY, USA},
 +}
 +
 +@article{Cilardo:2015:IMM:2695583.2675359,
 + author = {Cilardo, Alessandro and Gallo, Luca},
 + title = {Improving Multibank Memory Access Parallelism with Lattice-Based
 +Partitioning},
 + journal = {ACM Trans. Archit. Code Optim.},
 + issue_date = {January 2015},
 + volume = {11},
 + number = {4},
 + month = jan,
 + year = {2015},
 + issn = {1544-3566},
 + pages = {45:1--45:25},
 + articleno = {45},
 + numpages = {25},
 + url = {http://doi.acm.org/10.1145/2675359},
 + doi = {10.1145/2675359},
 + acmid = {2675359},
 + publisher = {ACM},
 + address = {New York, NY, USA},
 + keywords = {Memory partitioning, field-programmable gate arrays, fine-grained
 +distributed shared memory, polyhedral model},
 +}
  
melange/papers/fall2015.txt · Last modified: 2015/11/10 09:40 (external edit)