图像增强 cnn



CycleGan增强 2个项目

该文是香港理工大学张磊老师及其学生在图像增强领域的又一颠覆性成果。它将深度学习技术与传统3DLUT图像增强技术结合,得到了一种更灵活、更高效的图像增强技术。所提方法能够以1.66ms的速度对4K分辨率图像进行增强(硬件平台:Titan RTX GPU)。

paper: https://www4.comp.polyu.edu.hk/~cslzhang/paper/PAMI_LUT.pdf

code: https://github.com/HuiZeng/Image-Adaptive-3DLUT



from setuptools import setup
import torch
from torch.utils.cpp_extension import BuildExtension, CUDAExtension, CppExtension

if torch.cuda.is_available():
    print('Including CUDA code.')
            CUDAExtension('trilinear', [
            'build_ext': BuildExtension
    print('NO CUDA is found. Fall back to CPU.')
        ext_modules=[CppExtension('trilinear', ['src/trilinear.cpp'])],
        cmdclass={'build_ext': BuildExtension})

set MSSdk=1

编译成功,调用 dll找不到,解决方法:






import glob

import tensorflow as tf
from tensorflow.keras.layers import Conv2D, BatchNormalization, SpatialDropout2D, ReLU, Input, Concatenate, Add
from tensorflow.keras.losses import MeanAbsoluteError, MeanSquaredError
from tensorflow.keras.optimizers import Adam
import os
import pandas as pd
import cv2

class UWCNN(tf.keras.Model):

    def __init__(self):
        super(UWCNN, self).__init__()
        self.conv1 = Conv2D(16, 3, (1, 1), 'same', name="conv2d_dehaze1")
        self.relu1 = ReLU()
        self.conv2 = Conv2D(16, 3, (1, 1), 'same', name="conv2d_dehaze2")
        self.relu2 = ReLU()
        self.conv3 = Conv2D(16, 3, (1, 1), 'same', name="conv2d_dehaze3")
        self.relu3 = ReLU()
        self.concat1 = Concatenate(axis=3)

        self.conv4 = Conv2D(16, 3, (1, 1), 'same', name="conv2d_dehaze4")
        self.relu4 = ReLU()
        self.conv5 = Conv2D(16, 3, (1, 1), 'same', name="conv2d_dehaze5")
        self.relu5 = ReLU()
        self.conv6 = Conv2D(16, 3, (1, 1), 'same', name="conv2d_dehaze6")
        self.relu6 = ReLU()
        self.concat2 = Concatenate(axis=3)

        self.conv7 = Conv2D(16, 3, (1, 1), 'same', name="conv2d_dehaze7")
        self.relu7 = ReLU()
        self.conv8 = Conv2D(16, 3, (1, 1), 'same', name="conv2d_dehaze8")
        self.relu8 = ReLU()
        self.conv9 = Conv2D(16, 3, (1, 1), 'same', name="conv2d_dehaze9")
        self.relu9 = ReLU()
        self.concat3 = Concatenate(axis=3)

        self.conv10 = Conv2D(3, 3, (1, 1), 'same', name="conv2d_dehaze10")
        self.add1 = Add()

    def call(self, inputs):
        image_conv1 = self.relu1(self.conv1(inputs))
        image_conv2 = self.relu2(self.conv2(image_conv1))
        image_conv3 = self.relu3(self.conv3(image_conv2))
        dehaze_concat1 = self.concat1([image_conv1, image_conv2, image_conv3, inputs])

        image_conv4 = self.relu4(self.conv4(dehaze_concat1))
        image_conv5 = self.relu5(self.conv5(image_conv4))
        image_conv6 = self.relu6(self.conv6(image_conv5))
        dehaze_concat2 = self.concat2([dehaze_concat1, image_conv4, image_conv5, image_conv6])

        image_conv7 = self.relu7(self.conv7(dehaze_concat2))
        image_conv8 = self.relu8(self.conv8(image_conv7))
        image_conv9 = self.relu9(self.conv9(image_conv8))
        dehaze_concat3 = self.concat3([dehaze_concat2, image_conv7, image_conv8, image_conv9])

        image_conv10 = self.conv10(dehaze_concat3)
        out = self.add1([inputs, image_conv10])
        return out

def parse_function(filename, label):
    filename_image_string = tf.io.read_file(filename)
    label_image_string = tf.io.read_file(label)
    # Decode the filename_image_string
    filename_image = tf.image.decode_bmp(filename_image_string, channels=3)
    filename_image = tf.image.convert_image_dtype(filename_image, tf.float32)
    # Decode the label_image_string
    label_image = tf.image.decode_bmp(label_image_string, channels=3)
    label_image = tf.image.convert_image_dtype(label_image, tf.float32)
    return filename_image, label_image

def combloss (y_actual, y_predicted):
    This is the custom loss function for keras model
    :param y_actual:
    :param y_predicted:
    # this is just l2 + lssim
    lssim = tf.constant(1, dtype=tf.float32) - tf.reduce_mean(tf.image.ssim(y_actual, y_predicted, max_val=1, filter_size=13)) #remove max_val=1.0
    lmse = MeanSquaredError(reduction=tf.keras.losses.Reduction.SUM_OVER_BATCH_SIZE)(y_actual, y_predicted)
    lmse = tf.math.multiply(lmse, 4)
    return tf.math.add(lmse, lssim)

def train(datafile="data.csv", ckptpath="./train_type1/cp.ckpt", type='type1'):
    df = pd.read_csv(datafile)
    augfiles = list(df["AUGFILE"])
    gtfiles = list(df["GTFILE"])

    augImages = tf.constant(augfiles)
    gtImages = tf.constant(gtfiles)

    dataset = tf.data.Dataset.from_tensor_slices((augImages, gtImages))
    dataset = dataset.shuffle(len(augImages))
    #dataset = dataset.repeat()
    dataset = dataset.map(parse_function).batch(10)

    # Call backs
    #checkpoint_path = "./train_type1/cp.ckpt"
    checkpoint_path = ckptpath
    checkpoint_dir = os.path.dirname(checkpoint_path)

    # Create a callback that saves the model's weights
    cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path, save_weights_only=True, verbose=1)

    model = UWCNN()
    model.compile(optimizer=Adam(), loss=combloss)
    model.fit(dataset, epochs=40, callbacks=[cp_callback])


def model_test(imgfile="12433.png", ckdir="./train_type1/cp.ckpt", outdir="./results/", type='type1'):
    # model = tf.keras.models.load_model('save_model/'+type, custom_objects={'loss': combloss}, compile=False)

    model = UWCNN()
    # model.summary()
    model.compile(optimizer=Adam(), loss=combloss)
    filename_image_string = tf.io.read_file(imgfile)
    filename_image = tf.image.decode_png(filename_image_string, channels=3)
    filename_image = tf.image.convert_image_dtype(filename_image, tf.float32)
    filename_image = tf.image.resize(filename_image, (460, 620))
    l, w, c = filename_image.shape
    filename_image = tf.reshape(filename_image, [1, l, w, c])
    output = model.predict(filename_image)
    output = output.reshape((l, w, c)) * 255
    cv2.imwrite(outdir+type+"_"+os.path.basename(imgfile), output)

def eval_dir():




    model = UWCNN()
    # model.summary()
    model.compile(optimizer=Adam(), loss=combloss)

    for imgfile in files:

        filename_image_string = tf.io.read_file(imgfile)
        filename_image = tf.image.decode_png(filename_image_string, channels=3)
        filename_image = tf.image.convert_image_dtype(filename_image, tf.float32)
        filename_image = tf.image.resize(filename_image, (256, 256))
        l, w, c = filename_image.shape
        filename_image = tf.reshape(filename_image, [1, l, w, c])
        output = model.predict(filename_image)
        output = output.reshape((l, w, c)) * 255
        cv2.imwrite(out_dir+ os.path.basename(imgfile), output)

if __name__ == "__main__":
    # train(datafile="data_type1.csv", ckptpath="./train_type1/cp.ckpt", type='type1')

    type = "type1"
    ckdir = "./train_type1/cp.ckpt"
    # model_test(imgfile="./test_images/532_img_.png", ckdir=ckdir, outdir="./results/", type=type)
    # model_test(imgdir="./test_images/", imgfile="602_img_.png", ckdir=ckdir, outdir="./results/", type=type)
    # model_test(imgdir="./test_images/", imgfile="617_img_.png", ckdir=ckdir, outdir="./results/", type=type)
    # model_test(imgdir="./test_images/", imgfile="12422.png", ckdir=ckdir, outdir="./results/", type=type)
    # model_test(imgdir="./test_images/", imgfile="12433.png", ckdir=ckdir, outdir="./results/", type=type)


# This code normalizes the output images in HSI space
# Code Inspired By: Li C, Anwar S, Porikli F. Underwater scene prior inspired deep learning image and video enhancement[J]. Pattern Recognition, 2020, 98: 107038
# Implementation By: Max Midwinter
#HSI and RGB conversion code by: DaiPuWei

import os
import cv2
import numpy as np
import math

def RGB2HSI(rgb_img):
         This is the function to convert RGB color image to HSI image
         :param rgm_img: RGB color image
         :return: HSI image
    #Save the number of rows and columns of the original image
    row = np.shape(rgb_img)[0]
    col = np.shape(rgb_img)[1]
    #Copy the original image
    hsi_img = rgb_img.copy()
    #Channel splitting the image
    B,G,R = cv2.split(rgb_img)
    #R, G, B = cv2.split(rgb_img)
    # Normalize the channel to [0,1]
    [B,G,R] = [ i/ 255.0 for i in ([B,G,R])]
    H = np.zeros((row, col))    #Define H channel
    I = (R + G + B) / 3.0       #Calculate I channel
    S = np.zeros((row,col))      #Define S channel
    for i in range(row):
        den = np.sqrt((R[i]-G[i])**2+(R[i]-B[i])*(G[i]-B[i]))
        thetha = np.arccos(0.5*(R[i]-B[i]+R[i]-G[i])/den)   #Calculate the included angle
        h = np.zeros(col)               #Define temporary array
        #den>0 and G>=B element h is assigned to thetha
        h[B[i]<=G[i]] = thetha[B[i]<=G[i]]
        #den>0 and G<=B element h is assigned to thetha
        h[G[i]<B[i]] = 2*np.pi-thetha[G[i]<B[i]]
        #den<0 element h is assigned a value of 0
        h[den == 0] = 0
        H[i] = h/(2*np.pi)      #Assign to the H channel after radiating
    #Calculate S channel
    for i in range(row):
        min = []
        #Find the minimum value of each group of RGB values
        for j in range(col):
            arr = [B[i][j],G[i][j],R[i][j]]
        min = np.array(min)
        #Calculate S channel
        S[i] = 1 - min*3/(R[i]+B[i]+G[i])
        #I is 0 directly assigned to 0
        S[i][R[i]+B[i]+G[i] == 0] = 0
    #Extend to 255 for easy display, generally H component is between [0,2pi], S and I are between [0,1]
    hsi_img[:,:,0] = H*255
    hsi_img[:,:,1] = S*255
    hsi_img[:,:,2] = I*255
    return hsi_img

def HSI2RGB(hsi_img):
         This is the function to convert HSI image to RGB image
         :param hsi_img: HSI color image
         :return: RGB image
    # Save the number of rows and columns of the original image
    row = np.shape(hsi_img)[0]
    col = np.shape(hsi_img)[1]
    #Copy the original image
    rgb_img = hsi_img.copy()
    #Channel splitting the image
    H,S,I = cv2.split(hsi_img)
    # Normalize the channel to [0,1]
    [H,S,I] = [ i/ 255.0 for i in ([H,S,I])]
    R,G,B = H,S,I
    for i in range(row):
        h = H[i]*2*np.pi
        #H is greater than or equal to 0 and less than 120 degrees
        a1 = h >=0
        a2 = h < 2*np.pi/3
        a = a1 & a2         #Fancy index of the first case
        tmp = np.cos(np.pi / 3 - h)
        b = I[i] * (1 - S[i])
        r = I[i]*(1+S[i]*np.cos(h)/tmp)
        g = 3*I[i]-r-b
        B[i][a] = b[a]
        R[i][a] = r[a]
        G[i][a] = g[a]
        #H is greater than or equal to 120 degrees and less than 240 degrees
        a1 = h >= 2*np.pi/3
        a2 = h < 4*np.pi/3
        a = a1 & a2         #Fancy index of the second case
        tmp = np.cos(np.pi - h)
        r = I[i] * (1 - S[i])
        g = I[i]*(1+S[i]*np.cos(h-2*np.pi/3)/tmp)
        b = 3 * I[i] - r - g
        R[i][a] = r[a]
        G[i][a] = g[a]
        B[i][a] = b[a]
        #H is greater than or equal to 240 degrees and less than 360 degrees
        a1 = h >= 4 * np.pi / 3
        a2 = h < 2 * np.pi
        a = a1 & a2             #Fancy index of the third case
        tmp = np.cos(5 * np.pi / 3 - h)
        g = I[i] * (1-S[i])
        b = I[i]*(1+S[i]*np.cos(h-4*np.pi/3)/tmp)
        r = 3 * I[i] - g - b
        B[i][a] = b[a]
        G[i][a] = g[a]
        R[i][a] = r[a]
    rgb_img[:,:,0] = B*255
    rgb_img[:,:,1] = G*255
    rgb_img[:,:,2] = R*255
    return rgb_img

def transform (dir = None):
    img = cv2.imread(dir)
    hsi_img = RGB2HSI(img)/255
    h, s, i = cv2.split(hsi_img)
    s = cv2.normalize(s, dst=None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX)
    i = cv2.normalize(i, dst=None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX)
    norm_hsi_img = cv2.merge((h, s, i))
    norm_hsi_img = norm_hsi_img*255
    rgb_img = HSI2RGB(norm_hsi_img)
    cv2.imwrite(dir, rgb_img)

def allImgInDir (path = './results'):
    fname = []
    for root, d_names, f_names in os.walk(path):
        for f in f_names:
            fname.append(os.path.join(root, f))
            print("File: "+str(f))
            transform(os.path.join(root, f))
    print("fname  %s" % fname)

if __name__ ==  "__main__":

CycleGan增强 2个项目

GitHub - ioannispol/UnderWaterGAN: CycleGAN model to generate images with underwater features


GitHub - darkmatter18/Underwater-image-enhancement: A Deep Learning CycleGAN Based application, that can enhance the underwater images.


GitHub - aitorzip/PyTorch-CycleGAN: A clean and readable Pytorch implementation of CycleGAN


