# Takes an array of images and shows enough to fill up specified rows and columns
# This function is really useful for creating a contact sheet of generated images
# This way we can scroll through lots of images and pick the most compelling ones
def examine_images(images, rows, cols, start_idx, show_idx, suptitle=None, y=.93, scale=1):
fig, axs = plt.subplots(rows, cols, figsize=(int(12*scale), int(12 * rows / 5 * scale)))
idx = start_idx
for i in range(rows):
for j in range(cols):
if show_idx:
show_image(axs[i,j], images[idx], str(idx))
else:
show_image(axs[i,j], images[idx])
idx += 1
if suptitle is not None:
fig.suptitle(suptitle, fontsize=30, y=y)
# Looking through the 2,000 images I produced earlier, these are the indices I liked
best70_idx = [19, 31, 76, 102, 95, 151, 198, 215, 246, 268, 395, 396, 490, 515, 537, 538, 612, 751, 772, 776, 934]
best100_idx = [74, 85, 102, 155, 169, 214, 427, 430, 672, 784, 792]
best170_idx = [113, 369, 579, 778, 803, 806, 804, 1083, 1082, 1511, 1535, 1595]
best200_idx = [125, 130, 177, 319, 392, 403, 400, 416, 586, 599, 608, 660, 1979, 1829, 1547, 1408]
best70_favs = best_gan70_imgs[best70_idx]
best100_favs = best_gan70_imgs[best100_idx]
best170_favs = best_gan70_imgs[best170_idx]
best200_favs = best_gan70_imgs[best200_idx]
best_gan_favs = np.vstack((best70_favs, best100_favs, best170_favs, best200_favs))
filter_idx = [0,1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,27,28, 38, 45, 57]
best_gan_favs = best_gan_favs[filter_idx]
examine_images(best_gan_favs, 5, 5, 0, show_idx=False, suptitle="Favorites from Best Gan", scale=1.2)
Comments:
We have some spectacular results, many of which look more appealing than the large majority of my dataset! The "best gan" has learned to produce a wide variety of landscapes with beautiful colors and detail. In so much as these have an abstract look, they have clearly not just copied the training set. We have created something new and inspiring. More on model originality to come.
weak_gan_favs_idx = [127, 245, 301, 317, 320, 402, 751, 761, 1015, 1056]
weak_gan_favs = weak_gan75_imgs[weak_gan_favs_idx]
examine_images(weak_gan_favs, 2, 5, 0, False, suptitle="Favorites from Weak Gan", y=1, scale=1.3)
Comments:
The "weak gan" does not produce great landscapes at the same frequency as the "best gan", but we can still find some convincing results. Perhaps due to the simpler network architecture, these images are simpler than the ones from the "best gan". We see horizons and grass, but not too many other distinct features.
deep_gan_fav_idx = [61, 201, 240, 308, 430, 465, 474, 566, 605, 706, 719, 747, 801, 905, 923, 1089, 1252, 1296, 1341, 1374, 1385, 1398, 1502, 1526, 1631, 1634, 1699, 1790, 1849, 1921]
deep_gan_favs = deep_gan35_imgs[deep_gan_fav_idx]
examine_images(deep_gan_favs, 6, 5, 0, False, suptitle="Favorites from Deep Gan", y=.93, scale=1.3)
Comments:
As noted earlier, the "deep gan" is more dreamy and smooth than the other two generators. Considering this was only trained for 35 epochs, it may have the most potential. Perhaps the larger 7 x 7 kernel size is responsible for the extra smoothness we observe. All in all some gorgeous outputs with rich and hormonious colors that evoke a sense of peace.
Perhaps instead of taking the time to cherry pick good results ourselves, we could have the discriminator do the work for us. Let's see which generated images the discriminator thinks are real, and which it knows are fake.
gan50 = load_model("models/2_128_r3/gen.25.h5")
dis50 = load_model("models/2_128_r3/dis.25.h5")
gan50_imgs, gan50_noise = generate_images(gan15, 1000)
def get_preds(dis, images):
imgs_shifted = images * 2 - 1
return dis.predict(imgs_shifted)
def show_best_worst(dis, images, cols, scale=1.5):
preds = get_preds(dis, images).flatten()
sorted_idxs = np.argsort(preds)
fig, ax = plt.subplots(2, cols, figsize=(int(cols*2*scale), int(5*scale)))
for row in range(2):
for col in range(cols):
if row == 0:
idx = sorted_idxs[-1 - col]
else:
idx = sorted_idxs[col]
pred = preds[idx]
img = images[idx]
show_image(ax[row,col], img, "Pred: {:.2f}".format(pred), fontsize=18)
show_best_worst(dis15, gan15_imgs, cols=5)
Comments:
Unfortunately the discriminator is not able to pick out the best generated images for us; however, the one's it thinks are real do tend to look better than the ones it thinks are fakes. In this case the fakes all have a fairly salient blemish.
"The Most Human Human" is a book by Brian Christian that reflects on state of the art turing test competitions. Humans converse with both computers and other humans by text and attempt to judge whether they are talking to a machine or not. The most human human is the human that is most frequently predicted by humans as human. Here I play on the idea with one of my generator / discriminator pairs by looking at the real images that most looked like the real images as well as the reals that most look faked, as judged by the discriminator.
show_best_worst(dis15, landscapes, cols=5)
Comments:
We note that the discriminator is less convinced with the images that look like the google scrapes, probably because there were fewer of them in the training data.
One fun and popular thing to do in the latent space of GANs is image blending. The blend works by feeding a generator a linear path in latent space from one image to another.
# grab some more sample generated image idx's for the following examples
idx_170 = [1736, 1855, 1111, 1210]
idx_100 = [1326, 1345, 1802, 1499, 1658, 1687, 1817, 1704, 1703, 1752, 1759, 1768, 1778, 1775, 1793, 1927, 1964, 1998, 1902]
example_imgs1 = best_gan170_imgs[idx_170]
example_imgs2 = best_gan100_imgs[idx_100]
gan170_imgs2, gan170_noise2 = generate_images(best_gan170, 1000)
# Takes a generater and a noise vector and generates an image
def noise_to_image(gen, nos, plot=True, ax=None, title=None):
gen_img = gen.predict(nos.reshape(1,-1))[0]
gen_img = 0.5 * gen_img + 0.5
if plot:
if ax is None:
plt.imshow(gen_img)
else:
ax.imshow(gen_img)
ax.axis("off")
if title is not None:
ax.set_title(title)
return
return gen_img
# Takes a smooth walk from one image latent space to another, generating the intervening images along the way
def blend_images(idx1, idx2, gan, noise):
noise1 = noise[idx1]
noise2 = noise[idx2]
diff = noise2 - noise1
fig, ax = plt.subplots(1,5, figsize=(15,3))
for i in range(5):
nos = noise1 + diff * i/4 # creates the intervening noise vectors
noise_to_image(gan, nos, ax=ax[i])
fig.suptitle("Image Blend", fontsize=25)
blend_images(792, 669, best_gan170, gan170_noise2)
Now we inspect how close our generated images our to training images in a way that's more mathematically rigorous than just eye-balling results. Both metrics used here are imperfect and there still might be images in the training set that a human would label as closer in feel than the ones presented here. Still, this is quite interesting and helps to build the case for model originality.
def nearest_L2(generated, reals):
min_dist = np.infty
nearest = None
for real_img in reals:
dist = np.linalg.norm(generated.flatten() - real_img.flatten(), 2)
if dist < min_dist:
min_dist = dist
nearest = real_img
return nearest
def cos(v1, v2):
return np.dot(v1,v2) / (np.linalg.norm(v1)*np.linalg.norm(v2))
def nearest_cosine(generated, reals):
max_similarity = 0
nearest = None
for real_img in reals:
v1 = generated.flatten()
v2 = real_img.flatten()
cosine = cos(v1, v2)
if cosine > max_similarity:
max_similarity = cosine
nearest = real_img
return nearest
def plot_neighbors(images, distance_func, metric_name):
fig, ax = plt.subplots(2,7, figsize=(22,5))
for col in range(7):
generated = images[col]
nearest = distance_func(generated, landscapes)
show_image(ax[0, col], generated, "Generated", fontsize=20)
show_image(ax[1, col], nearest, "Nearest Real", fontsize=20)
fig.tight_layout()
fig.suptitle(metric_name + " Neighbors", y=1.07, fontsize=25)
plot_neighbors(example_imgs2, nearest_L2, "L2")
plot_neighbors(example_imgs2[9:], nearest_cosine, "Cosine Similarity")
noise_example = gan170_noise2[419]
fig, ax = plt.subplots(1,5, figsize=(16,4))
mult = .5
for col in range(5):
noise = noise_example * mult # simply multiple the noise by a constant and compare outputs
noise_to_image(best_gan170, noise, ax=ax[col], title="Multiple = {:.2f}".format(mult))
mult += .2
fig.suptitle("Vector Arithmetic", fontsize=25, y=1.05);
Comments:
We can see that points further from the origin in latent space tend to brighter, more saturated generated images.
noise_example2 = gan170_noise2[701]
noise_example3 = gan170_noise2[433]
def add_small_noise(demo_noise):
fig, ax = plt.subplots(1,5, figsize=(15,4))
for col in range(5):
noise = demo_noise + np.random.normal(0, .4, 100) # Add a slight amount of extra noise each time
noise_to_image(best_gan170, noise, ax=ax[col])
fig.suptitle("Random Tip Toe", fontsize=23, y=.93);
add_small_noise(noise_example2)
add_small_noise(noise_example3)
nice_images = np.vstack((example_imgs1, example_imgs2))
examine_images(nice_images, 4, 5, 0, show_idx=True, suptitle="More Nice Generated Images", scale=1.3)
This project has highlighted the potential for GANs to create attractive, original paintings. Success hinged on collecting a large and varied dataset of landscape paintings, using GAN code already working on MNIST as a base, and leveraging the compute power of Google Cloud to experiment with hyperparameter tuning and architecture. The work of Alec Radford et al to find tips for DCGAN convergence was also critical, as was a handful of blogs showing that GANs work well on the Wiki Art dataset.
For future work, it would be great to increase the resolution. That I recall, I have not yet seen any examples of GANs creating landscapes in resolution larger than 128 x 128. One idea is to use a super resolution network on the images that I already have. Also, Nvidia has had promising success generating high resolution fake faces by gradually increasing resolution while training.
I'm glad to have produced something I can hang on my wall, even though it's just a contact sheet of thumbnails. In the future I hope to see user friendly, interactive GANs utilized by artists to enhance creativity.