@inproceedings{ASPDAC-2024,author={Hamid, Ali, Asmer and Zhang, Fan and Yang, Li and Fan, Deliang},booktitle={2024 30th Asia and South Pacific Design Automation Conference (ASPDAC)},title={Learning to Prune and Low-Rank Adaptation for Compact Language Model Deployment},year={2024},volume={},number={},}
JSSC
A 65nm RRAM Compute-in-Memory Macro for Genome Processing
Fan Zhang, Amitesh Sridharan, Wangxin He, and 7 more authors
In 2024 IEEE Journal of Solid State Circuits (JSSC), 2024
@inproceedings{JSSC_24,author={Zhang, Fan and Sridharan, Amitesh and He, Wangxin and Yeo, Injune and Liehr, Maximilian and Zhang, Wei and Cady, Nathaniel and Cao, Yu and Seo, Jae-sun and Fan, Deliang},booktitle={2024 IEEE Journal of Solid State Circuits (JSSC)},title={A 65nm RRAM Compute-in-Memory Macro for Genome Processing},year={2024},volume={},number={},doi={},}
@inproceedings{DAC_24_1,author={Zhang, Fan and Sridharan, Amitesh and Tsai, Wilman and Chen, Yiran and Wang, Shan X. and Fan, Deliang},booktitle={2024 61st ACM/IEEE Design Automation Conference (DAC)},title={Efficient Memory Integration: MRAM-SRAM Hybrid Accelerator for Sparse On-Device Learning},year={2024},volume={},number={},pages={1-6},doi={},}
@inproceedings{DAC_24_2,author={Zhang, Fan and Yang, Li and Fan, Deliang},booktitle={2024 61st ACM/IEEE Design Automation Conference (DAC)},title={Hyb-Learn: A Framework for On-Device Self-Supervised Continual Learning with Hybrid RRAM/SRAM Memory},year={2024},volume={},number={},pages={1-6},doi={},}
TCAD
On-Device Continual Learning with STT-Assisted-SOT MRAM based In-Memory Computing
Fan Zhang, Wilman Sridharan, Shan X. Wang, and 1 more author
In IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems, 2024
@inproceedings{TCAD_1,author={Zhang, Fan and Sridharan, Amitesh Tsai, Wilman and Wang, Shan X. and Fan, Deliang},booktitle={IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems},title={On-Device Continual Learning with STT-Assisted-SOT MRAM based In-Memory Computing},year={2024},volume={},number={},pages={1-6},doi={},}
CICC
SP-IMC: A Sparsity Aware In-Memory-Computing Macro in 28nm CMOS with Configurable Sparse Representation for Highly Sparse DNN Workloads
Amitesh Sridharan, Fan Zhang, Jae-sun Seo, and 1 more author
In 2024 IEEE Custom Integrated Circuits Conference (CICC), 2024
@inproceedings{CICC,author={Sridharan, Amitesh and Zhang, Fan and Seo, Jae-sun and Fan, Deliang},booktitle={2024 IEEE Custom Integrated Circuits Conference (CICC)},title={SP-IMC: A Sparsity Aware In-Memory-Computing Macro in 28nm CMOS with Configurable Sparse Representation for Highly Sparse DNN Workloads},year={2024},volume={},number={},pages={1-6},doi={},}
SSCL
Amitesh Sridharan, Jyotishman Saikia, Anupreetham, and 3 more authors
@inproceedings{SSCL,author={Sridharan, Amitesh and Saikia, Jyotishman and Anupreetham and Zhang, Fan and Seo, Jae-sun and Fan, Deliang},booktitle={IEEE Solid-State Circuits Letters},title={},year={2024},volume={},number={},pages={1-6},doi={},}
@inproceedings{10247829,author={Sridharan, Amitesh and Zhang, Fan and Sui, Yang and Yuan, Bo and Fan, Deliang},booktitle={2023 60th ACM/IEEE Design Automation Conference (DAC)},title={DSPIMM: A Fully Digital SParse In-Memory Matrix Vector Multiplier for Communication Applications},year={2023},volume={},number={},pages={1-6},doi={10.1109/DAC56929.2023.10247829},}
@article{9968274,author={Hwang, William and Xue, Fen and Zhang, Fan and Song, Ming-Yuan and Lee, Chien-Min and Turgut, Emrah and Chen, T. C. and Bao, Xinyu and Tsai, Wilman and Fan, Deliang and Wang, Shan X.},journal={IEEE Transactions on Magnetics},title={Energy Efficient Computing With High-Density, Field-Free STT-Assisted SOT-MRAM (SAS-MRAM)},year={2023},volume={59},number={3},pages={1-6},doi={10.1109/TMAG.2022.3224729},}
@article{10034770,author={Zhang, Fan and Angizi, Shaahin and Sun, Jiao and Zhang, Wei and Fan, Deliang},journal={IEEE Journal on Emerging and Selected Topics in Circuits and Systems},title={Aligner-D: Leveraging In-DRAM Computing to Accelerate DNA Short Read Alignment},year={2023},volume={13},number={1},pages={332-343},doi={10.1109/JETCAS.2023.3241545},}
@misc{yang2023efficient,title={Efficient Self-supervised Continual Learning with Progressive Task-correlated Layer Freezing},author={Yang, Li and Lin, Sen and Zhang, Fan and Zhang, Junshan and Fan, Deliang},year={2023},eprint={2303.07477},archiveprefix={arXiv},primaryclass={cs.CV},}
@inproceedings{9712508,author={Zhang, Fan and Yang, Li and Meng, Jian and Cao, Yu Kevin and Seo, Jae-sun and Fan, Deliang},booktitle={2022 27th Asia and South Pacific Design Automation Conference (ASP-DAC)},title={XBM: A Crossbar Column-wise Binary Mask Learning Method for Efficient Multiple Task Adaption},year={2022},volume={},number={},pages={610-615},doi={10.1109/ASP-DAC52403.2022.9712508},}
@inproceedings{9774660,author={Zhang, Fan and Yang, Li and Meng, Jian and Seo, Jae-Sun and Cao, Yu and Fan, Deliang},booktitle={2022 Design, Automation & Test in Europe Conference & Exhibition (DATE)},title={XST: A Crossbar Column-wise Sparse Training for Efficient Continual Learning},year={2022},volume={},number={},pages={48-51},doi={10.23919/DATE54114.2022.9774660},}
@inproceedings{9911440,author={Sridharan, Amitesh and Angizi, Shaahin and Cherupally, Sai Kiran and Zhang, Fan and Seo, Jae-Sun and Fan, Deliang},booktitle={ESSCIRC 2022- IEEE 48th European Solid State Circuits Conference (ESSCIRC)},title={A 1.23-GHz 16-kb Programmable and Generic Processing-in-SRAM Accelerator in 65nm},year={2022},volume={},number={},pages={153-156},doi={10.1109/ESSCIRC55480.2022.9911440},}
ReRAM crossbar array as a high-parallel fast and energy-efficient structure attracts much attention, especially on the acceleration of Deep Neural Network (DNN) inference on one specific task. However, due to the high energy consumption of weight re-programming and the ReRAM cells’ low endurance problem, adapting the crossbar array for multiple tasks has not been well explored. In this paper, we propose XMA, a novel crossbar-aware shift-based mask learning method for multiple task adaption in the ReRAM crossbar DNN accelerator for the first time. XMA leverages the popular mask-based learning algorithm’s benefit to mitigate catastrophic forgetting and learn a task-specific, crossbar column-wise, and shift-based multi-level mask, rather than the most commonly used element-wise binary mask, for each new task based on a frozen backbone model. With our crossbar-aware design innovation, the required masking operation to adapt for a new task could be implemented in an existing crossbar-based convolution engine with minimal hardware/memory overhead and, more importantly, no need for power-hungry cell re-programming, unlike prior works. The extensive experimental results show that, compared with state-of-the-art multiple task adaption Piggyback method [1], XMA achieves 3.19% higher accuracy on average, while saving 96.6% memory overhead. Moreover, by eliminating cell re-programming, XMA achieves 4.3x higher energy efficiency than Piggyback.
@inproceedings{10.1145/3489517.3530458,author={Zhang, Fan and Yang, Li and Meng, Jian and Seo, Jae-sun and Cao, Yu (Kevin) and Fan, Deliang},title={XMA: A Crossbar-Aware Multi-Task Adaption Framework via Shift-Based Mask Learning Method},year={2022},isbn={9781450391429},publisher={Association for Computing Machinery},address={New York, NY, USA},url={https://doi.org/10.1145/3489517.3530458},doi={10.1145/3489517.3530458},booktitle={Proceedings of the 59th ACM/IEEE Design Automation Conference},pages={271–276},numpages={6},keywords={neural networks, in-memory computing, multi-task learning},location={San Francisco, California},series={DAC '22},}
GLSVLSI
MnM: A Fast and Efficient Min/Max Searching in MRAM
Amitesh Sridharan, Fan Zhang, and Deliang Fan
In Proceedings of the Great Lakes Symposium on VLSI 2022, Irvine, CA, USA, 2022
In-Memory Computing (IMC) technology has been considered to be a promising approach to solve well-known memory-wall challenge for data intensive applications. In this paper, we are the first to propose MnM, a novel IMC system with innovative architecture/circuit designs for fast and efficient Min/Max searching computation in emerging Spin-Orbit Torque Magnetic Random Access Memory (SOT-MRAM). Our proposed SOT-MRAM based in-memory logic circuits are specially optimized to perform parallel, one-cycle XNOR logic that are heavily used in the Min/Max searching-in-memory algorithm. Our novel in-memory XNOR circuit also has an overhead of just two transistors per row when compared to most prior methodologies which typically use multiple sense amplifiers or complex CMOS logic gates. We also design all other required peripheral circuits for implementing complete Min/Max searching-in-MRAM computation. Our cross-layer comprehensive experiments on Dijkstra’s algorithm and other sorting algorithms in real word datasets show that our MnM could achieve significant performance improvement over CPUs, GPUs, and other competing IMC platforms based on RRAM/MRAM/DRAM.
@inproceedings{10.1145/3526241.3530349,author={Sridharan, Amitesh and Zhang, Fan and Fan, Deliang},title={MnM: A Fast and Efficient Min/Max Searching in MRAM},year={2022},isbn={9781450393225},publisher={Association for Computing Machinery},address={New York, NY, USA},url={https://doi.org/10.1145/3526241.3530349},doi={10.1145/3526241.3530349},booktitle={Proceedings of the Great Lakes Symposium on VLSI 2022},pages={39–44},numpages={6},keywords={SOT-MRAM, in-memory-computing, Min/Max},location={Irvine, CA, USA},series={GLSVLSI '22},}
IEEE Asilomar
Efficient Multi-task Adaption for Crossbar-based In-Memory Computing
Fan Zhang, Li Yang, and Deliang Fan
In 2022 56th Asilomar Conference on Signals, Systems, and Computers, 2022
@inproceedings{10052040,author={Zhang, Fan and Yang, Li and Fan, Deliang},booktitle={2022 56th Asilomar Conference on Signals, Systems, and Computers},title={Efficient Multi-task Adaption for Crossbar-based In-Memory Computing},year={2022},volume={},number={},pages={328-333},doi={10.1109/IEEECONF56349.2022.10052040},}
Front. in Elec.
XMA2: A crossbar-aware multi-task adaption framework via 2-tier masks
@article{10.3389/felec.2022.1032485,author={Zhang, Fan and Yang, Li and Meng, Jian and Seo, Jae-sun and Cao, Yu and Fan, Deliang},title={XMA2: A crossbar-aware multi-task adaption framework via 2-tier masks},journal={Frontiers in Electronics},volume={3},year={2022},url={https://www.frontiersin.org/articles/10.3389/felec.2022.1032485},doi={10.3389/felec.2022.1032485},issn={2673-5857},}
Efficient Multi-task Adaption for Crossbar-based In-Memory Computing
Fan Zhang, Li Yang, and Deliang Fan
In 2022 56th Asilomar Conference on Signals, Systems, and Computers, 2022
XMA2: A crossbar-aware multi-task adaption framework via 2-tier masks
Recently, ReRAM crossbar-based deep neural network (DNN) accelerator has been widely investigated. However, most prior works focus on single-task inference due to the high energy consumption of weight reprogramming and ReRAM cells’ low endurance issue. Adapting the ReRAM crossbar-based DNN accelerator for multiple tasks has not been fully explored. In this study, we propose XMA2, a novel crossbar-aware learning method with a 2-tier masking technique to efficiently adapt a DNN backbone model deployed in the ReRAM crossbar for new task learning. During the XMA2-based multi-task adaption (MTA), the tier-1 ReRAM crossbar-based processing-element- (PE-) wise mask is first learned to identify the most critical PEs to be reprogrammed for essential new features of the new task. Subsequently, the tier-2 crossbar column-wise mask is applied within the rest of the weight-frozen PEs to learn a hardware-friendly and column-wise scaling factor for new task learning without modifying the weight values. With such crossbar-aware design innovations, we could implement the required masking operation in an existing crossbar-based convolution engine with minimal hardware/memory overhead to adapt to a new task. The extensive experimental results show that compared with other state-of-the-art multiple-task adaption methods, XMA2 achieves the highest accuracy on all popular multi-task learning datasets.
@inproceedings{9586096,author={Zhang, Fan and Angizi, Shaahin and Fan, Deliang},booktitle={2021 58th ACM/IEEE Design Automation Conference (DAC)},title={Max-PIM: Fast and Efficient Max/Min Searching in DRAM},year={2021},volume={},number={},pages={211-216},doi={10.1109/DAC18074.2021.9586096},}
@inproceedings{9586144,author={Zhang, Fan and Angizi, Shaahin and Fahmi, Naima Ahmed and Zhang, Wei and Fan, Deliang},booktitle={2021 58th ACM/IEEE Design Automation Conference (DAC)},title={PIM-Quantifier: A Processing-in-Memory Platform for mRNA Quantification},year={2021},volume={},number={},pages={43-48},doi={10.1109/DAC18074.2021.9586144},}
An LSTM and GAN Based ECG Abnormal Signal Generator
Han Sun, Fan Zhang, and Yunxiang Zhang
In Advances in Artificial Intelligence and Applied Cognitive Computing, 2021
The electrocardiogram (ECG), a recording of the electrical activity of the heart, is commonly used for cardiac analysis, but lack of abnormal ECG signal data restricts the development of high quality automatic auxiliary diagnosis. In this paper, we introduce an LSTM and GAN based ECG abnormal signal generator to alleviate the issue. By training with a small set of real abnormal signals, the proposed generator can learn and produce high quality fake abnormal signals. The fake signals are then combined with real signals to train abnormal ECG classifiers. We show that our method can significantly improve the ability of classifiers in recognizing the uncommon case with a low proportion in the database.
@inproceedings{10.1007/978-3-030-70296-0_54,author={Sun, Han and Zhang, Fan and Zhang, Yunxiang},editor={Arabnia, Hamid R. and Ferens, Ken and de la Fuente, David and Kozerenko, Elena B. and Olivas Varela, Jos{\'e} Angel and Tinetti, Fernando G.},title={An LSTM and GAN Based ECG Abnormal Signal Generator},booktitle={Advances in Artificial Intelligence and Applied Cognitive Computing},year={2021},publisher={Springer International Publishing},address={Cham},pages={743--755},isbn={978-3-030-70296-0},}
2020
JETC
Mitigate Parasitic Resistance in Resistive Crossbar-Based Convolutional Neural Networks
Traditional computing hardware often encounters on-chip memory bottleneck on large-scale Convolution Neural Networks (CNN) applications. With its unique in-memory computing feature, resistive crossbar-based computing attracts researchers’ attention as a promising solution to the memory bottleneck issue in von Neumann architectures. However, the parasitic resistances in crossbar deviate its behavior from the ideal weighted summation operation. In large-scale implementations, the impact of parasitic resistances must be carefully considered and mitigated to ensure circuits’ functionality. In this work, we implemented and simulated CNNs on resistive crossbar circuits with consideration of parasitic resistances. Moreover, we carried out a new mapping scheme for high utilization of crossbar arrays on convolution, and a mitigation algorithm to mitigate parasitic resistances in CNN applications. The mitigation algorithm considers parasitic resistances as well as data/kernel patterns of each layer to minimize the computing error in crossbar-based convolutions of CNNs. We demonstrated the proposed methods with implementations of a 4-layer CNN on MNIST, and residual neural network (ResNet) (20, 32, and 56) on CIFAR-10. Simulation results show the proposed methods well mitigate the parasitic resistances in crossbars. With our methods, modern CNNs on crossbars can preserve ideal (software) level classification accuracy with 6-bit ADCs and DACs implementation.
@article{10.1145/3371277,author={Zhang, Fan and Hu, Miao},title={Mitigate Parasitic Resistance in Resistive Crossbar-Based Convolutional Neural Networks},year={2020},issue_date={July 2020},publisher={Association for Computing Machinery},address={New York, NY, USA},volume={16},number={3},issn={1550-4832},url={https://doi.org/10.1145/3371277},doi={10.1145/3371277},journal={J. Emerg. Technol. Comput. Syst.},month=may,articleno={25},numpages={20},keywords={Resistive crossbar, convolutional neural network, parasitic resistance},}
@inproceedings{9045730,author={Zhang, Fan and Hu, Miao},booktitle={2020 25th Asia and South Pacific Design Automation Conference (ASP-DAC)},title={Defects Mitigation in Resistive Crossbars for Analog Vector Matrix Multiplication},year={2020},volume={},number={},pages={187-192},doi={10.1109/ASP-DAC47756.2020.9045730},}
Resistive crossbar arrays are known for their unique structure to implement analog in-memory vector-matrix-multiplications (VMM). However, general-purpose circuit simulators, such as HSPICE and HSIM, are too slow for large scale crossbar array simulations with consideration of circuit parasitics. Although there are some specific simulators designed for crossbar arrays, they mainly focus on area/power/delay estimation rather than accurate SPICE-level simulation, thus could not model its functionality on analog in-memory computing. In this paper, we firstly give a SPICE-level modeling of resistive crossbar array with consideration of circuit parasitics in MATLAB. We also propose efficient methods to further speedup simulations by model simplifications. Last but not least, ResNet-20 on CIFAR-10 is applied to demonstrate the work. With the proposed model simplification methods, simulation speed can be improved by 31X with tolerable errors, and more than 5X speedup is achieved on ResNet-20 while the accuracy drop is 6%.
@inproceedings{10.1145/3400302.3415627,author={Zhang, Fan and Hu, Miao},title={CCCS: <u>c</U>ustomized Spice-Level <u>c</U>rossbar-Array <u>c</U>ircuit <u>s</U>imulator for in-Memory Computing},year={2020},isbn={9781450380263},publisher={Association for Computing Machinery},address={New York, NY, USA},url={https://doi.org/10.1145/3400302.3415627},doi={10.1145/3400302.3415627},booktitle={Proceedings of the 39th International Conference on Computer-Aided Design},articleno={136},numpages={8},location={Virtual Event, USA},series={ICCAD '20},}
@misc{zhang2018memristorbased,title={Memristor-based Deep Convolution Neural Network: A Case Study},author={Zhang, Fan and Hu, Miao},year={2018},eprint={1810.02225},archiveprefix={arXiv},primaryclass={cs.NE},}