We investigate the latent feature space of a pre-trained StyleGAN and discover some excellent spatial transformation properties. Based on the observation, we propose a novel unified framework based on a pre-trained StyleGAN that enables a set of powerful functionalities, i.e., high-resolution video generation, disentangled control by driving video or audio, and flexible face editing.
@article{2203.04036,
author = {Yin, Fei and Zhang, Yong and Cun, Xiaodong and Cao, Mingdeng and Fan, Yanbo and Wang, Xuan and Bai, Qingyan and Wu, Baoyuan and Wang, Jue and Yang, Yujiu},
title = {StyleHEAT: One-Shot High-Resolution Editable Talking Face Generation via Pre-trained StyleGAN},
journal = {arxiv:2203.04036},
year = {2022}
}