User Tools

Site Tools


melange:papers:spring2018

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revision Previous revision
Next revision
Previous revision
Next revision Both sides next revision
melange:papers:spring2018 [2018/02/21 11:36]
prerana
melange:papers:spring2018 [2018/02/21 17:17]
sanjay
Line 1: Line 1:
 @article{Bielecki:​2016:​TAN:​3060371.3060383,​ @article{Bielecki:​2016:​TAN:​3060371.3060383,​
- ​author = {Bielecki, ​W\lodzimierz ​and Pa\lkowski, Marek},+ ​author = {Bielecki, ​Wlodzimierz ​and Pa\lkowski, Marek},
  title = {Tiling Arbitrarily Nested Loops by Means of the Transitive},​  title = {Tiling Arbitrarily Nested Loops by Means of the Transitive},​
  ​journal = {Int. J. Appl. Math. Comput. Sci.},  ​journal = {Int. J. Appl. Math. Comput. Sci.},
Line 48: Line 48:
 number="​6",​ number="​6",​
 pages="​607--631",​ pages="​607--631",​
-abstract="​There are many algorithms for the space-time mapping of nested loops. Some of them even make the optimal choices within their framework. We propose a preprocessing phase for algorithms in the polytope model, which extends the model and yields space-time mappings whose schedule is, in some cases, orders of magnitude faster. These are cases in which the dependence graph has small irregularities. The basic idea is to split the index set of the loop nests into parts with a regular dependence structure and apply the existing space-time mapping algorithms to these parts individually. This work is based on a seminal idea in the more limited context of loop parallelization at the code level. We elevate the idea to the model level (our model is the polytope model), which increases its applicability by providing a clearer and wider range of choices at an acceptable analysis cost. Index set splitting is one facet in the effort to extend the power of the polytope model and to enable the generation of competitive target code.",​ 
-issn="​1573-7640",​ 
 doi="​10.1023/​A:​1007516818651",​ doi="​10.1023/​A:​1007516818651",​
 url="​https://​doi.org/​10.1023/​A:​1007516818651"​ url="​https://​doi.org/​10.1023/​A:​1007516818651"​
Line 79: Line 77:
 number={1}, ​ number={1}, ​
 pages={127-138}, ​ pages={127-138}, ​
-keywords={DRAM chips;data flow computing;​energy conservation;​feedforward neural nets;​learning (artificial intelligence);​neural net architecture;​power aware computing;​reconfigurable architectures;​AI systems;​AlexNet;​CNN shapes;DRAM accesses;​Eyeriss;​MAC;​RS dataflow reconfiguration;​accelerator chip;​convolutional layers;data movement energy cost;​dataflow processing;​deep convolutional neural networks;​energy efficiency;​energy-efficient reconfigurable accelerator;​multiply and accumulation;​off-chip DRAM;​reconfiguring architecture;​row stationary;​spatial architecture;​Clocks;​Computer architecture;​Hardware;​Neural networks;​Random access memory;​Shape;​Throughput;​Convolutional neural networks (CNNs);​dataflow processing;​deep learning;​energy-efficient accelerators;​spatial architecture},​ 
 url = {http://​ieeexplore.ieee.org/​document/​7738524/​}, ​ url = {http://​ieeexplore.ieee.org/​document/​7738524/​}, ​
 doi={10.1109/​JSSC.2016.2616357}, ​ doi={10.1109/​JSSC.2016.2616357}, ​
Line 102: Line 99:
 } }
  
-BibTeX | EndNote | ACM Ref 
 @article{Feautrier:​2006:​SSS:​1217445.1217447,​ @article{Feautrier:​2006:​SSS:​1217445.1217447,​
  ​author = {Feautrier, Paul},  ​author = {Feautrier, Paul},
Line 187: Line 183:
 number="​2",​ number="​2",​
 pages="​95--113",​ pages="​95--113",​
-abstract="​The parallelization of many algorithms can be obtained using space-time transformations which are applied on nested do-loops or on recurrence equations. In this paper, we analyze systems of linear recurrence equations, a generalization of uniform recurrence equations. The first part of the paper describes a method for finding automatically whether such a system can be scheduled by an affine timing function, independent of the size parameter of the algorithm. In the second part, we describe a powerful method that makes it possible to transform linear recurrences into uniform recurrence equations. Both parts rely on results on integral convex polyhedra. Our results are illustrated on the Gauss elimination algorithm and on the Gauss-Jordan diagonalization algorithm.",​ 
 issn="​0922-5773",​ issn="​0922-5773",​
 doi="​10.1007/​BF02477176",​ doi="​10.1007/​BF02477176",​
Line 194: Line 189:
  
 @Article{feautrier91,​ @Article{feautrier91,​
-author= ​        ​"​Feautrier,​ P.",+author= "​Feautrier,​ P.",
 title= ​         "​Dataflow analysis of array and scalar references",​ title= ​         "​Dataflow analysis of array and scalar references",​
 journal= ​       "​International Journal of Parallel Programming",​ journal= ​       "​International Journal of Parallel Programming",​
Line 203: Line 198:
 month= ​         "​Feb",​ month= ​         "​Feb",​
 url=            "​http://​www.cs.colostate.edu/​~cs560/​Spring2011/​Notes/​FeautrierEDFAijpp91.pdf"​ url=            "​http://​www.cs.colostate.edu/​~cs560/​Spring2011/​Notes/​FeautrierEDFAijpp91.pdf"​
-annote= ​        "​This article explains how a simple imperative language 
-        program (consisting only of assignments,​ for loops with affine loop 
-        limits, and arrays with affine index expressions),​ can be statically 
-        analyzed to find the flow dependencies."​ 
 } }
  
 @article{feautrier92a,​ @article{feautrier92a,​
-  ​author ​   = {Paul Feautrier},​ +author ​   = {Paul Feautrier},​ 
-  title     = {Some Efficient Solutions to the Affine Scheduling Problem+title     = {Some Efficient Solutions to the Affine Scheduling Problem {Part I}.  One-dimensional Time}, 
-              ​{Part I}.  One-dimensional Time}, +journal ​  = {International Journal of Parallel Programming},​ 
-  journal ​  = {International Journal of Parallel Programming},​ +volume ​   = {21}, 
-  volume ​   = {21}, +number ​   = {5}, 
-  number ​   = {5}, +year      = {1992}, 
-  year      = {1992}, +pages     = {313-347},​ 
-  pages     = {313-347},​ +url        = {http://​dx.doi.org/​10.1007/​BF01407835}
-  url        = {http://​dx.doi.org/​10.1007/​BF01407835}+
 } }
  
 @article{feautrier92b,​ @article{feautrier92b,​
-  ​author ​   = {Paul Feautrier},​ +author ​   = {Paul Feautrier},​ 
-  title     = {Some Efficient Solutions to the Affine Scheduling Problem+title     = {Some Efficient Solutions to the Affine Scheduling Problem {Part II}. Multidimensional Time}, 
-               {Part II}. Multidimensional Time}, +journal ​  = {International Journal of Parallel Programming},​ 
-  journal ​  = {International Journal of Parallel Programming},​ +volume ​   = {21}, 
-  volume ​   = {21}, +number ​   = {6}, 
-  number ​   = {6}, +year      = {1992}, 
-  year      = {1992}, +pages     = {389-420},​ 
-  pages     = {389-420},​ +url        = {http://​dx.doi.org/​10.1007/​BF01379404}
-  url        = {http://​dx.doi.org/​10.1007/​BF01379404}+
 } }
  
Line 418: Line 407:
  
 @article{cummins2017synthesizing,​ @article{cummins2017synthesizing,​
- 
    ​title={Synthesizing benchmarks for predictive modeling},    ​title={Synthesizing benchmarks for predictive modeling},
- 
    ​author={Cummins,​ Chris and Petoumenos, Pavlos and Wang, Zheng and Leather, Hugh},    ​author={Cummins,​ Chris and Petoumenos, Pavlos and Wang, Zheng and Leather, Hugh},
- 
    ​year={2017},​    ​year={2017},​
-    
    ​url={http://​homepages.inf.ed.ac.uk/​hleather/​publications/​2017-benchsynth-cgo.pdf}    ​url={http://​homepages.inf.ed.ac.uk/​hleather/​publications/​2017-benchsynth-cgo.pdf}
- 
 } }
  
 @article{optimistic2017,​ @article{optimistic2017,​
- 
    ​title={Optimistic Loop Optimization},​    ​title={Optimistic Loop Optimization},​
- 
    ​author={Doerfert,​ Johannes and Grosser, Tobias and Hack, Sebastian},    ​author={Doerfert,​ Johannes and Grosser, Tobias and Hack, Sebastian},
    url = {http://​dl.acm.org/​citation.cfm?​id=3049832.3049864},​    url = {http://​dl.acm.org/​citation.cfm?​id=3049832.3049864},​
    ​year={2017}    ​year={2017}
- 
 } }
  
Line 483: Line 464:
 month = {February}, month = {February},
 url = {https://​www.microsoft.com/​en-us/​research/​wp-content/​uploads/​2016/​02/​CNN20Whitepaper.pdf},​ url = {https://​www.microsoft.com/​en-us/​research/​wp-content/​uploads/​2016/​02/​CNN20Whitepaper.pdf},​
-abstract = { 
- 
-We describe the design of a convolutional neural network accelerator running on a Stratix V FPGA. The design runs at three times the throughput of previous FPGA CNN accelerator designs. We show that the throughput/​watt is significantly higher than for a GPU, and project the performance when ported to an Arria 10 FPGA. 
- 
- 
-}, 
 publisher = {Microsoft Research}, publisher = {Microsoft Research},
 url = {https://​www.microsoft.com/​en-us/​research/​publication/​accelerating-deep-convolutional-neural-networks-using-specialized-hardware/​},​ url = {https://​www.microsoft.com/​en-us/​research/​publication/​accelerating-deep-convolutional-neural-networks-using-specialized-hardware/​},​
melange/papers/spring2018.txt · Last modified: 2018/04/18 10:30 by prerana