User Tools

Site Tools


melange:papers:fall2015

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

melange:papers:fall2015 [2015/11/10 09:40] (current)
Line 1: Line 1:
 +@inproceedings{Acharya:​2015:​PNM:​2688500.2688512,​
 + ​author = {Acharya, Aravind and Bondhugula, Uday},
 + title = {PLUTO+: Near-complete Modeling of Affine Transformations for
 +Parallelism and Locality},
 + ​booktitle = {Proceedings of the 20th ACM SIGPLAN Symposium on Principles and
 +Practice of Parallel Programming},​
 + ​series = {PPoPP 2015},
 + year = {2015},
 + isbn = {978-1-4503-3205-7},​
 + ​location = {San Francisco, CA, USA},
 + pages = {54--64},
 + ​numpages = {11},
 + url = {http://​doi.acm.org/​10.1145/​2688500.2688512},​
 + doi = {10.1145/​2688500.2688512},​
 + acmid = {2688512},
 + ​publisher = {ACM},
 + ​address = {New York, NY, USA},
 + ​keywords = {Affine transformations,​ affine scheduling, automatic
 +parallelization,​ polyhedral model, stencil computations,​ tiling},
 +
 +
 +@INPROCEEDINGS{7161519, ​
 + ​author={Tithi,​ J.J. and Ganapathi, P. and Talati, A. and Aggarwal, S. and Chowdhury, R.}, 
 + ​booktitle={Parallel and Distributed Processing Symposium (IPDPS), 2015 IEEE International}, ​
 + ​title={High-Performance Energy-Efficient Recursive Dynamic Programming with Matrix-Multiplication-Like Flexible Kernels}, ​
 + ​year={2015}, ​
 + ​month={May}, ​
 + ​pages={303-312}, ​
 + ​keywords={divide and conquer methods;​dynamic programming;​mathematics computing;​matrix multiplication;​parallel algorithms;​DP problem;​FW-APSP;​Floyd-Warshall all-pairs shortest path;​cache-oblivious recursive divide-and-conquer;​dynamic programming;​gap penalty;​high-performing parallel implementation;​matrix-multiplication-like flexible kernel;​optimization;​parallel CORDAC algorithm;​cache-oblivious;​divide-and-conquer;​dynamic programming;​flexible kernel;​polyhedral compiler;​recursive}, ​
 + ​doi={10.1109/​IPDPS.2015.107}, ​
 + ​ISSN={1530-2075},​
 +}
 +
 +@inproceedings{Bondhugula:​2014:​TOT:​2628071.2628106,​
 + ​author = {Bondhugula,​ Uday and Bandishti, Vinayaka and Cohen, Albert and Potron, Guillain and Vasilache, Nicolas},
 + title = {Tiling and Optimizing Time-iterated Computations on Periodic Domains},
 + ​booktitle = {Proceedings of the 23rd International Conference on Parallel Architectures and Compilation},​
 + ​series = {PACT '14},
 + year = {2014},
 + isbn = {978-1-4503-2809-8},​
 + ​location = {Edmonton, AB, Canada},
 + pages = {39--50},
 + ​numpages = {12},
 + url = {http://​doi.acm.org/​10.1145/​2628071.2628106},​
 + doi = {10.1145/​2628071.2628106},​
 + acmid = {2628106},
 + ​publisher = {ACM},
 + ​address = {New York, NY, USA},
 + ​keywords = {automatic parallelization,​ periodic, polyhedral model, stencils, tiling},
 +
 +
 +
 +@inproceedings{Tang:​2015:​CWI:​2688500.2688514,​
 + ​author = {Tang, Yuan and You, Ronghui and Kan, Haibin and Tithi, Jesmin Jahan and Ganapathi, Pramod and Chowdhury, Rezaul A.},
 + title = {Cache-oblivious Wavefront: Improving Parallelism of Recursive Dynamic Programming Algorithms Without Losing Cache-efficiency},​
 + ​booktitle = {Proceedings of the 20th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming},​
 + ​series = {PPoPP 2015},
 + year = {2015},
 + isbn = {978-1-4503-3205-7},​
 + ​location = {San Francisco, CA, USA},
 + pages = {205--214},
 + ​numpages = {10},
 + url = {http://​doi.acm.org/​10.1145/​2688500.2688514},​
 + doi = {10.1145/​2688500.2688514},​
 + acmid = {2688514},
 + ​publisher = {ACM},
 + ​address = {New York, NY, USA},
 + ​keywords = {Cilk, cache-oblivious parallel algorithms, cache-oblivious wavefront, dynamic programming,​ multi-core, nested parallel computation},​
 +}
 +
 +
 +@inproceedings{Huang:​2014:​ARD:​2628071.2628089,​
 + ​author = {Huang, Cheng-Chieh and Nagarajan, Vijay},
 + title = {ATCache: Reducing DRAM Cache Latency via a Small SRAM Tag Cache},
 + ​booktitle = {Proceedings of the 23rd International Conference on Parallel Architectures and Compilation},​
 + ​series = {PACT '14},
 + year = {2014},
 + isbn = {978-1-4503-2809-8},​
 + ​location = {Edmonton, AB, Canada},
 + pages = {51--60},
 + ​numpages = {10},
 + url = {http://​doi.acm.org/​10.1145/​2628071.2628089},​
 + doi = {10.1145/​2628071.2628089},​
 + acmid = {2628089},
 + ​publisher = {ACM},
 + ​address = {New York, NY, USA},
 + ​keywords = {design, dram cache, performance},​
 +}
 +
 +@inproceedings{Fatehi:​2014:​ITS:​2628071.2628093,​
 + ​author = {Fatehi, Ehsan and Gratz, Paul},
 + title = {ILP and TLP in Shared Memory Applications:​ A Limit Study},
 + ​booktitle = {Proceedings of the 23rd International Conference on Parallel Architectures and Compilation},​
 + ​series = {PACT '14},
 + year = {2014},
 + isbn = {978-1-4503-2809-8},​
 + ​location = {Edmonton, AB, Canada},
 + pages = {113--126},
 + ​numpages = {14},
 + url = {http://​doi.acm.org/​10.1145/​2628071.2628093},​
 + doi = {10.1145/​2628071.2628093},​
 + acmid = {2628093},
 + ​publisher = {ACM},
 + ​address = {New York, NY, USA},
 + ​keywords = {instruction-level parallelism (ilp), limits, pthreads, thread-level parallelism (tlp)},
 +
 +
 +
 +@inproceedings{Cameron:​2014:​BDP:​2628071.2628079,​
 + ​author = {Cameron, Robert D. and Shermer, Thomas C. and Shriraman, Arrvindh and Herdy, Kenneth S. and Lin, Dan and Hull, Benjamin R. and Lin, Meng},
 + title = {Bitwise Data Parallelism in Regular Expression Matching},
 + ​booktitle = {Proceedings of the 23rd International Conference on Parallel Architectures and Compilation},​
 + ​series = {PACT '14},
 + year = {2014},
 + isbn = {978-1-4503-2809-8},​
 + ​location = {Edmonton, AB, Canada},
 + pages = {139--150},
 + ​numpages = {12},
 + url = {http://​doi.acm.org/​10.1145/​2628071.2628079},​
 + doi = {10.1145/​2628071.2628079},​
 + acmid = {2628079},
 + ​publisher = {ACM},
 + ​address = {New York, NY, USA},
 + ​keywords = {parallel bit streams, regular expression matching},
 +}
 +
 +@inproceedings{Ansel:​2014:​OEF:​2628071.2628092,​
 + ​author = {Ansel, Jason and Kamil, Shoaib and Veeramachaneni,​ Kalyan and Ragan-Kelley,​ Jonathan and Bosboom, Jeffrey and O'​Reilly,​ Una-May and Amarasinghe,​ Saman},
 + title = {OpenTuner: An Extensible Framework for Program Autotuning},​
 + ​booktitle = {Proceedings of the 23rd International Conference on Parallel Architectures and Compilation},​
 + ​series = {PACT '14},
 + year = {2014},
 + isbn = {978-1-4503-2809-8},​
 + ​location = {Edmonton, AB, Canada},
 + pages = {303--316},
 + ​numpages = {14},
 + url = {http://​doi.acm.org/​10.1145/​2628071.2628092},​
 + doi = {10.1145/​2628071.2628092},​
 + acmid = {2628092},
 + ​publisher = {ACM},
 + ​address = {New York, NY, USA},
 + ​keywords = {autotuner, optimization},​
 +}
 +
 +
 +@INPROCEEDINGS{6835968, ​
 + ​author={Hayenga,​ M. and Naresh, V.R.K. and Lipasti, M.H.}, ​
 + ​booktitle={High Performance Computer Architecture (HPCA), 2014 IEEE 20th International Symposium}, ​
 + ​title={Revolver:​ Processor architecture for power efficient loop execution}, ​
 + ​year={2014}, ​
 + ​month={Feb}, ​
 + ​pages={591-602}, ​
 + ​keywords={computer architecture;​energy conservation;​instruction sets;power aware computing;​Revolver architecture;​branch prediction;​dispatch logic;​energy efficiency;​frontend instruction dispatches;​instruction fetch;loop buffers;​loop execution;​loop iterations;​micro-op cache techniques;​out-of-order execution core;​out-of-order processor architecture;​power efficient loop execution;​processor core;​processor frontend;​static instruction instances;​Arrays;​Clocks;​Out of order;​Pipelines;​Rain;​Registers;​Resource management}, ​
 + ​doi={10.1109/​HPCA.2014.6835968},​
 +}
 +
 +
 +@INPROCEEDINGS{5377644, ​
 + ​author={Shafiq,​ M. and Pericas, M. and de la Cruz, R. and Araya-Polo, M. and Navarro, N. and Ayguade, E.}, 
 + ​booktitle={Field-Programmable Technology, 2009. FPT 2009.}, ​
 + ​title={Exploiting memory customization in FPGA for 3D stencil computations}, ​
 + ​year={2009}, ​
 + ​month={Dec}, ​
 + ​pages={38-45}, ​
 + ​keywords={field programmable gate arrays;​signal processing;​3D stencil computations;​FPGA;​IBM PowerXCell 8i;data reuse;​memory customization;​memory organization;​memory-bound kernels;​Bandwidth;​Computer applications;​Field programmable gate arrays;​Finite difference methods;​Finite impulse response filter;​Hardware;​Kernel;​Nearest neighbor searches;​Throughput;​Time domain analysis}, ​
 + ​doi={10.1109/​FPT.2009.5377644},​
 +}
 +
 +
 +@inproceedings{Wahib:​2015:​AGK:​2749246.2749255,​
 + ​author = {Wahib, Mohamed and Maruyama, Naoya},
 + title = {Automated GPU Kernel Transformations in Large-Scale Production Stencil Applications},​
 + ​booktitle = {Proceedings of the 24th International Symposium on High-Performance Parallel and Distributed Computing},
 + ​series = {HPDC '15},
 + year = {2015},
 + isbn = {978-1-4503-3550-8},​
 + ​location = {Portland, Oregon, USA},
 + pages = {259--270},
 + ​numpages = {12},
 + url = {http://​doi.acm.org/​10.1145/​2749246.2749255},​
 + doi = {10.1145/​2749246.2749255},​
 + acmid = {2749255},
 + ​publisher = {ACM},
 + ​address = {New York, NY, USA},
 + ​keywords = {cuda, gpu, source-to-source translation,​ stencil computations},​
 +}
 +
 +
 +@inproceedings{Benson:​2015:​FPP:​2688500.2688513,​
 + ​author = {Benson, Austin R. and Ballard, Grey},
 + title = {A Framework for Practical Parallel Fast Matrix Multiplication},​
 + ​booktitle = {Proceedings of the 20th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming},​
 + ​series = {PPoPP 2015},
 + year = {2015},
 + isbn = {978-1-4503-3205-7},​
 + ​location = {San Francisco, CA, USA},
 + pages = {42--53},
 + ​numpages = {12},
 + url = {http://​doi.acm.org/​10.1145/​2688500.2688513},​
 + doi = {10.1145/​2688500.2688513},​
 + acmid = {2688513},
 + ​publisher = {ACM},
 + ​address = {New York, NY, USA},
 + ​keywords = {dense linear algebra, fast matrix multiplication,​ parallel linear algebra, shared memory},
 +}
 +
 +
 +@inproceedings{Sukhija:​2014:​PSR:​2672598.2672904,​
 + ​author = {Sukhija, Nitin and Malone, Brandon and Srivastava, Srishti and Banicescu, Ioana and Ciorba, Florina M.},
 + title = {Portfolio-Based Selection of Robust Dynamic Loop Scheduling Algorithms Using Machine Learning},
 + ​booktitle = {Proceedings of the 2014 IEEE International Parallel \& Distributed Processing Symposium Workshops},
 + ​series = {IPDPSW '14},
 + year = {2014},
 + isbn = {978-1-4799-4116-2},​
 + pages = {1638--1647},​
 + ​numpages = {10},
 + url = {http://​dx.doi.org/​10.1109/​IPDPSW.2014.183},​
 + doi = {10.1109/​IPDPSW.2014.183},​
 + acmid = {2672904},
 + ​publisher = {IEEE Computer Society},
 + ​address = {Washington,​ DC, USA},
 + ​keywords = {Dynamic loop scheduling, robustness, algorithm selection, empirical robustness prediction models, machine learning techniques, variable system availability},​
 +}
 +
 +@incollection{Tiwari2014,​
 +year={2014},​
 +isbn={978-3-319-09872-2},​
 +booktitle={Euro-Par 2014 Parallel Processing},​
 +series={Euro-Par 2014 Parallel Processing},​
 +volume={8632},​
 +editor={Silva,​ Fernando and Dutra, Ins and Santos Costa, Vtor},
 +doi={10.1007/​978-3-319-09873-9_6},​
 +title={Modeling the Impact of Reduced Memory Bandwidth on HPC Applications},​
 +url={http://​dx.doi.org/​10.1007/​978-3-319-09873-9_6},​
 +publisher={Springer International Publishing},​
 +author={Tiwari,​ Ananta and Gamst, Anthony and Laurenzano, MichaelA. and Schulz, Martin and Carrington, Laura},
 +pages={63-74},​
 +language={English}
 +}
 +
 +
 +@INPROCEEDINGS{7056046, ​
 + ​author={Agarwal,​ N. and Nellans, D. and O'​Connor,​ M. and Keckler, S.W. and Wenisch, T.F.}, ​
 + ​booktitle={High Performance Computer Architecture (HPCA), 2015 IEEE 21st International Symposium}, ​
 + ​title={Unlocking bandwidth for GPUs in CC-NUMA systems}, ​
 + ​year={2015}, ​
 + ​month={Feb}, ​
 + ​pages={354-365}, ​
 + ​keywords={cache storage;​graphics processing units;​parallel processing;​storage management;​CC-NUMA GPU-CPU systems;CPU memory bandwidth;​GDDR memory;GPU kernel;GPU memory bandwidth;​GPU relaxed memory semantics;​GPU-based HPC applications;​aggressive memory prefetching;​bandwidth balancing;​hardware cache-coherence;​memory-intensive GPU workloads;​minimal hardware support;​on-demand software page migration;​oracular page placement;​software runtime system;​virtual address-based program locality;​Bandwidth;​Graphics processing units;​Hardware;​Memory management;​Random access memory;​Runtime}, ​
 + ​doi={10.1109/​HPCA.2015.7056046},​
 +}
 +
 +
 +@INPROCEEDINGS{6270616, ​
 + ​author={Changyou Zhang and Kun Huang and Xiang Cui and Yifeng Chen}, ​
 + ​booktitle={Parallel and Distributed Processing Symposium Workshops PhD Forum (IPDPSW), 2012 IEEE 26th International}, ​
 + ​title={Power-aware Programming with GPU Accelerators}, ​
 + ​year={2012}, ​
 + ​month={May}, ​
 + ​pages={2443-2449}, ​
 + ​keywords={graphics processing units;​multi-threading;​multiprocessing systems;​power aware computing;​ubiquitous computing;​GPU accelerators;​high-level program development;​manycore processor;​multithreaded processor;​on-chip parallelism;​parallel processor;​power consumption values;​power efficiency;​power estimation;​power-aware programming;​processor computational power;​processor memory bandwidth;​program statements;​ubiquitous computing;​Bandwidth;​Graphics processing unit;​Hardware;​Memory management;​Message systems;​Power demand;​Power measurement;​GPU;​Power-aware;​Primitive;​Programming}, ​
 + ​doi={10.1109/​IPDPSW.2012.301},​
 +}
 +
 +
 +@inproceedings{Fang:​2014:​TIX:​2568088.2576799,​
 + ​author = {Fang, Jianbin and Sips, Henk and Zhang, LiLun and Xu, Chuanfu and Che, Yonggang and Varbanescu, Ana Lucia},
 + title = {Test-driving Intel Xeon Phi},
 + ​booktitle = {Proceedings of the 5th ACM/SPEC International Conference on Performance Engineering},​
 + ​series = {ICPE '14},
 + year = {2014},
 + isbn = {978-1-4503-2733-6},​
 + ​location = {Dublin, Ireland},
 + pages = {137--148},
 + ​numpages = {12},
 + url = {http://​doi.acm.org/​10.1145/​2568088.2576799},​
 + doi = {10.1145/​2568088.2576799},​
 + acmid = {2576799},
 + ​publisher = {ACM},
 + ​address = {New York, NY, USA},
 + ​keywords = {experience with xeon phi, microbenchmarking,​ optimization,​ performance analysis},
 +
 +
 +                                              ​
 +@inproceedings{Ravishankar:​2015:​DMC:​2688500.2688515,​
 + ​author = {Ravishankar,​ Mahesh and Dathathri, Roshan and Elango, Venmugil and Pouchet, Louis-Noël and Ramanujam, J. and Rountev, Atanas and Sadayappan, P.},
 + title = {Distributed Memory Code Generation for Mixed Irregular/​Regular Computations},​
 + ​booktitle = {Proceedings of the 20th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming},​
 + ​series = {PPoPP 2015},
 + year = {2015},
 + isbn = {978-1-4503-3205-7},​
 + ​location = {San Francisco, CA, USA},
 + pages = {65--75},
 + ​numpages = {11},
 + url = {http://​doi.acm.org/​10.1145/​2688500.2688515},​
 + doi = {10.1145/​2688500.2688515},​
 + acmid = {2688515},
 + ​publisher = {ACM},
 + ​address = {New York, NY, USA},
 + ​keywords = {Distributed Memory, Inspector/​Executor,​ Irregular Computation,​ Polyhedral Compilation},​
 +}
 +
 +
 +@inproceedings{Cong:​2014:​OMS:​2593069.2593090,​
 + ​author = {Cong, Jason and Li, Peng and Xiao, Bingjun and Zhang, Peng},
 + title = {An Optimal Microarchitecture for Stencil Computation Acceleration Based on Non-Uniform Partitioning of Data Reuse Buffers},
 + ​booktitle = {Proceedings of the 51st Annual Design Automation Conference},​
 + ​series = {DAC '14},
 + year = {2014},
 + isbn = {978-1-4503-2730-5},​
 + ​location = {San Francisco, CA, USA},
 + pages = {77:​1--77:​6},​
 + ​articleno = {77},
 + ​numpages = {6},
 + url = {http://​doi.acm.org/​10.1145/​2593069.2593090},​
 + doi = {10.1145/​2593069.2593090},​
 + acmid = {2593090},
 + ​publisher = {ACM},
 + ​address = {New York, NY, USA},
 +}
 +
 +@ARTICLE{6470606, ​
 + ​author={Sano,​ K. and Hatsuda, Y. and Yamamoto, S.}, 
 + ​journal={Parallel and Distributed Systems, IEEE Transactions}, ​
 + ​title={Multi-FPGA Accelerator for Scalable Stencil Computation with Constant Memory Bandwidth}, ​
 + ​year={2014}, ​
 + ​month={March}, ​
 + ​volume={25}, ​
 + ​number={3}, ​
 + ​pages={695-705}, ​
 + ​keywords={field programmable gate arrays;​parallel processing;​storage management;​CCM;​GPU;​Jacobi computation;​SSA;​custom computing machine;​deep pipelining approach;​domain-specific programmable concept;​field programmable gate array;​graphics processing unit;​high-performance stencil computations;​memory bandwidth;​multiFPGA accelerator;​multicore microprocessors;​scalable stencil computation;​scalable streaming-array;​scientific computations;​Arrays;​Bandwidth;​Computational modeling;​Field programmable gate arrays;​Hardware;​Scalability;​FPGA;​Scalable streaming-array;​custom computing machine;​high-performance computation;​stencil computation}, ​
 + ​doi={10.1109/​TPDS.2013.51}, ​
 + ​ISSN={1045-9219},​
 +}
 +
 +
 +
 +@inproceedings{Fan:​2006:​IHE:​1176254.1176322,​
 + ​author = {Fan, Kevin and Kudlur, Manjunath and Park, Hyunchul and Mahlke,
 +Scott},
 + title = {Increasing Hardware Efficiency with Multifunction Loop
 +Accelerators},​
 + ​booktitle = {Proceedings of the 4th International Conference on
 +Hardware/​Software Codesign and System Synthesis},
 + ​series = {CODES+ISSS '06},
 + year = {2006},
 + isbn = {1-59593-370-0},​
 + ​location = {Seoul, Korea},
 + pages = {276--281},
 + ​numpages = {6},
 + url = {http://​doi.acm.org/​10.1145/​1176254.1176322},​
 + doi = {10.1145/​1176254.1176322},​
 + acmid = {1176322},
 + ​publisher = {ACM},
 + ​address = {New York, NY, USA},
 + ​keywords = {application-specific hardware, high-level synthesis, loop
 +accelerator,​ modulo scheduling, multifunction design},
 +}
 +
 +@article{Meswani:​2013:​MPP:​2493921.2493922,​
 + ​author = {Meswani, Mitesh R. and Carrington, Laura and Unat, Didem and
 +Snavely, Allan and Baden, Scott and Poole, Stephen},
 + title = {Modeling and Predicting Performance of High Performance Computing
 +Applications on Hardware Accelerators},​
 + ​journal = {Int. J. High Perform. Comput. Appl.},
 + ​issue_date = {May       ​2013},​
 + ​volume = {27},
 + ​number = {2},
 + month = may,
 + year = {2013},
 + issn = {1094-3420},​
 + pages = {89--108},
 + ​numpages = {20},
 + url = {http://​dx.doi.org/​10.1177/​1094342012468180},​
 + doi = {10.1177/​1094342012468180},​
 + acmid = {2493922},
 + ​publisher = {Sage Publications,​ Inc.},
 + ​address = {Thousand Oaks, CA, USA},
 + ​keywords = {FPGA, GPU, HPC, accelerators,​ benchmarking,​ idioms, performance
 +modeling, performance prediction},​
 +}
 +
 +
 +@article{Nery:​2013:​HRM:​2537182.2537569,​
 + ​author = {Nery, Alexandre S. and Jozwiak, Lech and Lindwer, Menno and Cocco, Mauro and Nedjah, Nadia and Franca, Felipe M. G.},
 + title = {Hardware Reuse in Modern Application-specific Processors and
 +Accelerators},​
 + ​journal = {Microprocess. Microsyst.},​
 + ​issue_date = {August, 2013},
 + ​volume = {37},
 + ​number = {6-7},
 + month = aug,
 + year = {2013},
 + issn = {0141-9331},​
 + pages = {684--692},
 + ​numpages = {9},
 + url = {http://​dx.doi.org/​10.1016/​j.micpro.2012.06.005},​
 + doi = {10.1016/​j.micpro.2012.06.005},​
 + acmid = {2537569},
 + ​publisher = {Elsevier Science Publishers B. V.},
 + ​address = {Amsterdam, The Netherlands,​ The Netherlands},​
 + ​keywords = {Application-specific processors, Area reduction, Hardware
 +accelerator,​ Power reduction, Resource sharing},
 +}
 +
 + 
 +
 +@inproceedings{Bandishti:​2012:​TSC:​2388996.2389051,​
 + ​author = {Bandishti, Vinayaka and Pananilath, Irshad and Bondhugula, Uday},
 + title = {Tiling Stencil Computations to Maximize Parallelism},​
 + ​booktitle = {Proceedings of the International Conference on High Performance
 +Computing, Networking, Storage and Analysis},
 + ​series = {SC '12},
 + year = {2012},
 + isbn = {978-1-4673-0804-5},​
 + ​location = {Salt Lake City, Utah},
 + pages = {40:​1--40:​11},​
 + ​articleno = {40},
 + ​numpages = {11},
 + url = {http://​dl.acm.org/​citation.cfm?​id=2388996.2389051},​
 + acmid = {2389051},
 + ​publisher = {IEEE Computer Society Press},
 + ​address = {Los Alamitos, CA, USA},
 + ​keywords = {compilers, program transformation},​
 +}
 +
 +
 +@inproceedings{Wonnacott13,​
 +    Author = {Dave G. Wonnacott and Michelle Mills Strout},
 +    Booktitle = {Proceedings of the 3rd International Workshop on Polyhedral Compilation Techniques (IMPACT)}, ​
 + series = {IMPACT 2013},
 +    Month = {January},
 +    Title = {On the Scalability of Loop Tiling Techniques},​
 +    Year = {2013},
 +    url={http://​impact.gforge.inria.fr/​impact2013/​papers/​impact2013_on_the_scalability_of_loop_tiling_techniques.pdf}
 +}
 +
 +
 +@inproceedings{Pugh:​1991:​OTF:​125826.125848,​
 + ​author = {Pugh, William},
 + title = {The Omega Test: A Fast and Practical Integer Programming Algorithm
 +for Dependence Analysis},
 + ​booktitle = {Proceedings of the 1991 ACM/IEEE Conference on Supercomputing},​
 + ​series = {Supercomputing '91},
 + year = {1991},
 + isbn = {0-89791-459-7},​
 + ​location = {Albuquerque,​ New Mexico, USA},
 + pages = {4--13},
 + ​numpages = {10},
 + url = {http://​doi.acm.org/​10.1145/​125826.125848},​
 + doi = {10.1145/​125826.125848},​
 + acmid = {125848},
 + ​publisher = {ACM},
 + ​address = {New York, NY, USA},
 +}
 +
 +
 +@inproceedings{cohenautomatic,​
 + title={Automatic Intra-Array Storage Optimization},​
 + author={Bhaskaracharya,​ Somashekaracharya G and Bondhugula, Uday and Cohen, Albert },
 + url={http://​www.csa.iisc.ernet.in/​TR/​2014/​3/​paper.pdf},​
 + series={IISc-CSA-TR-2014-3,​ Nov 2014 and submitted to ACM TOPLAS, Feb 2015},
 + year={2015},​
 +    }
 +
 +@inproceedings{Stock:​2014:​FED:​2594291.2594342,​
 + ​author = {Stock, Kevin and Kong, Martin and Grosser, Tobias and Pouchet,
 +Louis-Noël and Rastello, Fabrice and Ramanujam, J. and Sadayappan, P.},
 + title = {A Framework for Enhancing Data Reuse via Associative Reordering},​
 + ​booktitle = {Proceedings of the 35th ACM SIGPLAN Conference on Programming
 +Language Design and Implementation},​
 + ​series = {PLDI '14},
 + year = {2014},
 + isbn = {978-1-4503-2784-8},​
 + ​location = {Edinburgh, United Kingdom},
 + pages = {65--76},
 + ​numpages = {12},
 + url = {http://​doi.acm.org/​10.1145/​2594291.2594342},​
 + doi = {10.1145/​2594291.2594342},​
 + acmid = {2594342},
 + ​publisher = {ACM},
 + ​address = {New York, NY, USA},
 +}
 +
 +@article{Cilardo:​2015:​IMM:​2695583.2675359,​
 + ​author = {Cilardo, Alessandro and Gallo, Luca},
 + title = {Improving Multibank Memory Access Parallelism with Lattice-Based
 +Partitioning},​
 + ​journal = {ACM Trans. Archit. Code Optim.},
 + ​issue_date = {January 2015},
 + ​volume = {11},
 + ​number = {4},
 + month = jan,
 + year = {2015},
 + issn = {1544-3566},​
 + pages = {45:​1--45:​25},​
 + ​articleno = {45},
 + ​numpages = {25},
 + url = {http://​doi.acm.org/​10.1145/​2675359},​
 + doi = {10.1145/​2675359},​
 + acmid = {2675359},
 + ​publisher = {ACM},
 + ​address = {New York, NY, USA},
 + ​keywords = {Memory partitioning,​ field-programmable gate arrays, fine-grained
 +distributed shared memory, polyhedral model},
 +}
  
melange/papers/fall2015.txt · Last modified: 2015/11/10 09:40 (external edit)