Commit 42ef1db5 authored by Senen Cerda, A.'s avatar Senen Cerda, A.
Browse files

Initial commit

parents
This source diff could not be displayed because it is too large. You can view the blob instead.
#!/usr/bin/env python3
#SBATCH --partition=mcs.gpu.q
#SBATCH --output=openme_linear.out
#CLUSTER SIMULATION CODE
import numpy as np #import numpy package
import time
from sklearn.decomposition import PCA #import PCA for whitening
from numpy import linalg as la
from scipy.optimize import curve_fit
from numpy import asarray
from numpy import save
#import required keras modules
from keras.models import Sequential
from keras.layers import Dense, Dropout
#import tensorflow and pandas
#from __future__ import absolute_import, division, print_function, unicode_literals
import pandas as pd
import tensorflow as tf
#import tensorflow.transform as tft
#Loading Data
DATA_PATH= "superconduct/train.csv"
Data_Raw = pd.read_csv(DATA_PATH)
X_Raw = Data_Raw.drop(columns=["critical_temp"]).astype('float64')
Y_Raw = Data_Raw["critical_temp"].astype('float64')
#PCA Data Whitening
pca = PCA(whiten=True)
whitened = pca.fit_transform(X_Raw)
Data_Matrix = np.dot(Y_Raw.to_numpy(), whitened)
Data_Matrix =Data_Matrix/la.norm(Data_Matrix) # We let the matrix Y to have norm 1.
print('START SIMULATION DROPOUT WITH LINEAR ACTIVATION WITH GAUSSIAN INITIALIZATION')
#Structure constants
#p = 0.5
dim_output = 1
dim_hidden = 20
dim_input = Data_Matrix.shape[0]
#initialization constants
#sigma=0.1
##Intermediate variables
matrix2_dim = (dim_output, dim_hidden)
matrix1_dim = (dim_hidden, dim_input)
#Expectation Dropout Loss Linear NN, we take lambda as the only parameter depending on p
def Det_Droput_Linear_NN_Loss( Y, A, B, p):
C = A @ B
BB = B @ B.transpose()
AA = A.transpose() @ A
return (la.norm( Y - p*C))**2 + p*(1-p)*np.dot( np.diagonal(AA), \
np.diagonal(BB))
def Batch_Stoch_Dropout_Linear_NN_Loss( Y, A, B, p, size):
fun=0
for i in range(size):
F_A = np.multiply(A, np.random.binomial(1, p, size= matrix2_dim).astype('float64'))
F_B = np.multiply(B, np.random.binomial(1, p, size= matrix1_dim).astype('float64'))
fun=+ la.norm( Y - (A * F_A) @ (B * F_B))**2
return fun/size
def Det_Gradient_Dropout_Linear_NN_Loss(Y, A, B, p):
C = A @ B
BB = B @ B.transpose()
AA = A.transpose() @ A
Bt=B.transpose()
At=A.transpose()
return -2*(p)*(( Y - p*C) @ Bt) + 2*p*(1-p)*(A @ np.diag(np.diagonal(BB))), \
-2*(p)*(At@( Y - p*C)) + 2*p*(1-p)*( np.diag(np.diagonal(AA)) @ B)
def Batch_Stoch_Gradient_Dropout_Linear_NN_Loss( Y, A, B, p, size):
Delta_2 = np.zeros(matrix2_dim)
Delta_1 = np.zeros(matrix1_dim)
for i in range(size):
F_A = np.random.binomial(1, p, size= matrix2_dim).astype('float64')
F_B = np.random.binomial(1, p, size= matrix1_dim).astype('float64')
Af = A * F_A
Bf = B * F_B
Cf = Af @ Bf
Delta_2 =+ -2*F_A*(( Y - Cf )@ Bf.transpose())
Delta_1 =+ -2*(Af.transpose()@( Y - Cf))*F_B
return Delta_2, Delta_1
# Convergence Rate estimatior
def Tailfun(x,a,b,c):
return a*np.exp(-(x*b)) + c
def Approximate_linear(x, a,b,c):
return a*(1-b*x) + c
def Rate_Fit(L, alpha): #In order to have approximation to gradient, stepsize should be one when close to convergenece
total = L.shape[0]
xdata = np.array([x * 0.1 for x in range(total//5, total)])
ydata = L[total//5:total]
opt, cov = curve_fit(Tailfun, xdata, ydata)
return opt[1]/alpha
print('START SIMULATION DROPOUT WITH EPSILON-CLOSE INITIALIZATION WITH LINEAR ACTIVATION')
#Define Global variables that define solution
#U, eta, V = np.linalg.svd(Data_Matrix)
U= np.array([1.0])
eta = 1.0
V = np.array(Data_Matrix)
#p = 0.7
f = 20
#print(eta)
#print(V)
#eta_star = eta*(p*f/(p*f + 1-p)) #Maybe we need to multiply by p, since we have scaled the function.
#theta = np.sqrt(eta_star)
#CHECK OUT VARIABLES
#print(V)
#Structure constants
#p = 0.5
dim_output = 1
dim_hidden = 20
dim_input = Data_Matrix.shape[0]
#initialization constants
#sigma=0.1
##Intermediate variables
matrix2_dim = (dim_output, dim_hidden)
matrix1_dim = (dim_hidden, dim_input)
#Find Random minimum and add noise
def random_minima(a,b):
eta_star = eta*(a*b/(a*b + 1-a))/a
theta = np.sqrt(eta_star)
S = (2*np.random.binomial(1, 0.5, f) -1)/ np.sqrt(f)
return theta*S, theta*np.outer(S, V)
def initialization_epsilon_close(e, a, b):
A= np.random.normal(0, e, size = matrix2_dim)
B = np.random.normal(0, e, size = matrix1_dim)
#normalization = np.sqrt(np.linalg.norm(A)**2 + np.linalg.norm(B)**2)
#A = A/normalization
#B = B/normalization
C, D = random_minima(a,b)
return C + A, D + B
#Output dimension is one, we can find an explicit solution; refer to proposition 1 in paper.
#for i in range(1):
# Q, R = initialization_epsilon_close(0.01*i)
# print(np.linalg.svd(Q,full_matrices=False))
# print(np.transpose(np.linalg.svd(R,full_matrices=False)[0])[0])
for s in range(3):
#For plotting
e = 0.05 + s*0.05
iter_per_prob = 10 #change this
#iter_per_prob = 3 #change this
#beta = np.zeros((8,iter_per_prob))
#beta_err = np.zeros((8,iter_per_prob))
##parameters GD
#sigma=0.2 #random initialization standard deviation
rate = 0.01 # Learning rate
precision = 0.001 #approximation error
maxiter = 500000 # maximum number of iterationss
#initial_data = 15000 #Start fitting here
#maxiter = 100 # maximum number of iterationss
#initial_data = 1 #Start fitting here
#output_loss =np.zeros((iter_per_prob, maxiter))
for l in range(0,9):
p=(l+2)*0.1
output_loss =np.zeros((iter_per_prob, maxiter))
for j in range(0,iter_per_prob):
start_time = time.time()
## Variable Initialization
i = 0
error = 1
W_2, W_1 = initialization_epsilon_close(e, p, f)
## Data Trackers
Loss_GD = np.array([])
#Gradient Descent
## Procedure
while (i < maxiter) : #and (error > precision):
D_2, D_1 = Det_Gradient_Dropout_Linear_NN_Loss(Data_Matrix, W_2, W_1, p)
W_2, W_1 = W_2 - rate* D_2, W_1 - rate* D_1
error = np.sqrt(la.norm(D_2)**2 + la.norm(D_1)**2)
Loss_GD = np.append(Loss_GD, error) #Det_Droput_Linear_NN_Loss( Data_Matrix, W_2, W_1, p))
if (error < 1e-10):
i=i+1
Loss_GD = np.append(Loss_GD, np.zeros(maxiter - i))
break
#print(Loss[i], error)# MAYBE HERE PUTTING THE NORM OF THE GRADIENT IS BETTER
i = i + 1
#K, l, P = np.linalg.svd(W_2 @ W_1, full_matrices=False)
#eta_star_1 = p*eta*(p*f/(p*f + 1-p)) #Maybe we need to multiply by p, since we have scaled the function.
#print(l - eta_star_1)
#print(V - P)
##Data Fitting
#fit_guess = [0.5, 0.12, 0.1]
#dynamic_initial=int(initial_data/p) #and (error > divided by p):
#total = Loss_GD.shape[0]
#xdata = np.array([0.01*x for x in range(dynamic_initial, total)])
#ydata = Loss_GD[dynamic_initial:total]
output_loss[j] = Loss_GD
#popt, pcov = curve_fit(Tailfun, xdata, ydata, bounds=(0, fit_guess))
#popt[1]/alpha
#beta[l][j]=popt[1]/p
#beta_err[l][j]=np.sqrt(pcov[1,1])/p #Standard deviation needs to be divided by the scaling
#Plot
print('Running, iteration ', j,' of ', iter_per_prob)
print("Runtime: --- %s seconds ---" % (time.time() - start_time))
#
# plt.rcParams['figure.figsize'] = [10, 10]
#
#
# plt.plot(xdata, Tailfun(xdata, *popt), 'g--', label='fit: a=%5.3f, b=%5.10f, c=%5.3f' % tuple(popt))
# plt.plot(xdata, ydata, 'b-', label='data')
# plt.ylabel('Dropout Loss')
# plt.xlabel('Iteration')
# plt.legend()
# plt.show()
print('End of iterations for p=', p, ' e= ', e)
print(output_loss)
print('Saving array...')
output_string = "output_linear_final/output_linear_2_p_{:.1f}_e_{:.2f}.npy".format(p, e)
# save to npy file
save(output_string, output_loss)
#print(beta)
#print(beta_err)
print('END SIMULATION LINEAR COMPARISON DIFFERENT p')
print('')
print('')
print('')
print('')
print('')
print('')
print('')
print('')
print('')
print('')
print('')
print('')
print('')
print('')
print('')
print('')
print('START SIMULATION LINEAR COMPARISON DIFFERENT f')
#####################
#####################
#START CHANGING f
#####################
#####################
for s in range(6): #Add 6 for normal operation
iter_per_f = 10
e=0.001 + s*0.002 #Change e for different variances
#beta_f = np.zeros((19,iter_per_f))
#beta_err_f = np.zeros((19,iter_per_f))
##parameters GD
#sigma=0.2 #random initialization standard deviation
rate = 0.01 # Learning rate
precision = 0.001 #approximation error
maxiter = 500000 # maximum number of iterationss
#initial_data = 1 #Start fitting here
p=0.7 #Fix p and vary f
for l in range(0,19):
f = (1+l)*2
output_loss = np.zeros((iter_per_f, maxiter))
for j in range(0,iter_per_f):
start_time = time.time() #Time counter
dim_output = 1
dim_hidden = f
dim_input = Data_Matrix.shape[0] #input is 80
##Intermediate variables
matrix2_dim = (dim_output, dim_hidden)
matrix1_dim = (dim_hidden, dim_input)
#p=0.7
## Variable Initialization
i = 0
error = 1
W_2, W_1 = initialization_epsilon_close(e, p, f)
## Data Trackers
Loss_GD = np.array([])
#Gradient Descent
## Procedure
while (i < maxiter) : #and (error > precision):
D_2, D_1 = Det_Gradient_Dropout_Linear_NN_Loss(Data_Matrix, W_2, W_1, p)
W_2, W_1 = W_2 - rate* D_2, W_1 - rate* D_1
error = np.sqrt(la.norm(D_2)**2 + la.norm(D_1)**2)
Loss_GD = np.append(Loss_GD, error) #Det_Droput_Linear_NN_Loss( Data_Matrix, W_2, W_1, p))
if (error < 1e-10):
i=i+1
Loss_GD = np.append(Loss_GD, np.zeros(maxiter - i))
break
#print(Loss[i], error) #Maybe it is better to output the norm of the gradient of the signal
i = i + 1
##Data Fitting
#fit_guess = [1., 0.2, 0.2]
#dynamic_initial=int(initial_data/p)
#total = Loss_GD.shape[0]
#xdata = np.array([0.01*x for x in range(dynamic_initial, total)]) #Scale axis for better fit
#ydata = Loss_GD[dynamic_initial:total]
#popt, pcov = curve_fit(Tailfun, xdata, ydata, bounds=(0, fit_guess))
#popt[1]/alpha
#beta_f[l][j]=popt[1]/(p*p)
#beta_err_f[l][j]=np.sqrt(pcov[1,1])/(p*p) #Standard deviation needs to be divided by the scaling
##Plot
output_loss[j] = Loss_GD
#Plot
print('Running, iteration ', j,' of ', iter_per_f, '; f=', f)
print("Runtime: --- %s seconds ---" % (time.time() - start_time))
#print('End of iterations for f=', f, ' Sigma= ', sigma)
#print(output_loss,';')
#print(output_loss)
print('Saving array...')
output_string = "output_linear_final/output_linear_3_f_{:.0f}_e_{:.3f}.npy".format(f, e)
# save to npy file
save(output_string, output_loss)
# plt.rcParams['figure.figsize'] = [10, 10]
# plt.plot(xdata, Tailfun(xdata, *popt), 'g--', label='fit: a=%5.3f, b=%5.10f, c=%5.3f' % tuple(popt))
# plt.plot(xdata, ydata, 'b-', label='data')
# plt.ylabel('Dropout Loss')
# plt.xlabel('Iteration')
# plt.legend()
# plt.show()
print('END OF SIMULATION DROPOUT WITH EPSILON-CLOSE INITIALIZATION WITH LINEAR ACTIVATION')
#!/usr/bin/env python3
#SBATCH --partition=mcs.gpu.q
#SBATCH --output=openme.out
#CLUSTER SIMULATION CODE
import numpy as np #import numpy package
import time
from sklearn.decomposition import PCA #import PCA for whitening
from numpy import linalg as la
from scipy.optimize import curve_fit
from numpy import asarray
from numpy import save
#import required keras modules
from keras.models import Sequential
from keras.layers import Dense, Dropout
#import tensorflow and pandas
#from __future__ import absolute_import, division, print_function, unicode_literals
import pandas as pd
import tensorflow as tf
#import tensorflow.transform as tft
#Loading Data
DATA_PATH= "superconduct/train.csv"
Data_Raw = pd.read_csv(DATA_PATH)
X_Raw = Data_Raw.drop(columns=["critical_temp"]).astype('float64')
Y_Raw = Data_Raw["critical_temp"].astype('float64')
#PCA Data Whitening
pca = PCA(whiten=True)
whitened = pca.fit_transform(X_Raw)
Data_Matrix = np.dot(Y_Raw.to_numpy(), whitened)
Data_Matrix =Data_Matrix/la.norm(Data_Matrix) # We let the matrix Y to have norm 1.
print('START SIMULATION DROPOUT WITH LINEAR ACTIVATION')
#Structure constants
#p = 0.5
dim_output = 1
dim_hidden = 20
dim_input = Data_Matrix.shape[0]
#initialization constants
#sigma=0.1
##Intermediate variables
matrix2_dim = (dim_output, dim_hidden)
matrix1_dim = (dim_hidden, dim_input)
#Expectation Dropout Loss Linear NN
def Det_Droput_Linear_NN_Loss( Y, A, B, p):
C = A @ B
BB = B @ B.transpose()
AA = A.transpose() @ A
return (la.norm( Y - p*C))**2 + p*(1-p)*np.dot( np.diagonal(AA), \
np.diagonal(BB))
def Batch_Stoch_Dropout_Linear_NN_Loss( Y, A, B, p, size):
fun=0
for i in range(size):
F_A = np.multiply(A, np.random.binomial(1, p, size= matrix2_dim).astype('float64'))
F_B = np.multiply(B, np.random.binomial(1, p, size= matrix1_dim).astype('float64'))
fun=+ la.norm( Y - (A * F_A) @ (B * F_B))**2
return fun/size
def Det_Gradient_Dropout_Linear_NN_Loss(Y, A, B, p):
C = A @ B
BB = B @ B.transpose()
AA = A.transpose() @ A
Bt=B.transpose()
At=A.transpose()
return -2*(p)*(( Y - p*C) @ Bt) + 2*p*(1-p)*(A @ np.diag(np.diagonal(BB))), \
-2*(p)*(At@( Y - p*C)) + 2*p*(1-p)*( np.diag(np.diagonal(AA)) @ B)
def Batch_Stoch_Gradient_Dropout_Linear_NN_Loss( Y, A, B, p, size):
Delta_2 = np.zeros(matrix2_dim)
Delta_1 = np.zeros(matrix1_dim)
for i in range(size):
F_A = np.random.binomial(1, p, size= matrix2_dim).astype('float64')
F_B = np.random.binomial(1, p, size= matrix1_dim).astype('float64')
Af = A * F_A
Bf = B * F_B
Cf = Af @ Bf
Delta_2 =+ -2*F_A*(( Y - Cf )@ Bf.transpose())
Delta_1 =+ -2*(Af.transpose()@( Y - Cf))*F_B
return Delta_2, Delta_1
# Convergence Rate estimatior
def Tailfun(x,a,b,c):
return a*np.exp(-(x*b)) + c
def Approximate_linear(x, a,b,c):
return a*(1-b*x) + c
def Rate_Fit(L, alpha): #In order to have approximation to gradient, stepsize should be one when close to convergenece
total = L.shape[0]
xdata = np.array([x * 0.1 for x in range(total//5, total)])
ydata = L[total//5:total]
opt, cov = curve_fit(Tailfun, xdata, ydata)
return opt[1]/alpha
#Start Procedure comparison for different p and fixed Sigma
### Linear NN Dropout Training ###
def Initialization_Gaussian(eta):
return np.random.normal(0, eta, size = matrix2_dim), np.random.normal(0, eta, size = matrix1_dim)
for s in range(0,3):
#For plotting
sigma = 0.1 + s*0.1
iter_per_prob = 10
#beta = np.zeros((8,iter_per_prob))
#beta_err = np.zeros((8,iter_per_prob))
##parameters GD
#sigma=0.2 #random initialization standard deviation
rate = 0.01 # Learning rate
precision = 0.001 #approximation error
maxiter = 500000 # maximum number of iterationss
initial_data = 15000 #Start fitting here
#output_loss =np.zeros((iter_per_prob, maxiter))
for l in range(0,8):
p=(l+2)*0.1
output_loss =np.zeros((iter_per_prob, maxiter))
for j in range(0,iter_per_prob):
start_time = time.time()
## Variable Initialization
i = 0
error = 1
W_2, W_1 = Initialization_Gaussian(sigma)
## Data Trackers
Loss_GD = np.array([])
#Gradient Descent
## Procedure
while (i < maxiter) : #and (error > precision):
D_2, D_1 = Det_Gradient_Dropout_Linear_NN_Loss(Data_Matrix, W_2, W_1, p)
W_2, W_1 = W_2 - rate* D_2, W_1 - rate* D_1
error = la.norm(D_2) + la.norm(D_1)
Loss_GD = np.append(Loss_GD, error) #Det_Droput_Linear_NN_Loss( Data_Matrix, W_2, W_1, p))
if (error < 1e-9):
i=i+1
Loss_GD = np.append(Loss_GD, np.zeros(maxiter - i))
break
#Loss_GD = np.append(Loss_GD, error) #Det_Droput_Linear_NN_Loss( Data_Matrix, W_2, W_1, p))
#print(Loss[i], error)# MAYBE HERE PUTTING THE NORM OF THE GRADIENT IS BETTER
i = i + 1
##Data Fitting
#fit_guess = [0.5, 0.12, 0.1]
#dynamic_initial=int(initial_data/p) #and (error > divided by p):
#total = Loss_GD.shape[0]
#xdata = np.array([0.01*x for x in range(dynamic_initial, total)])
#ydata = Loss_GD[dynamic_initial:total]
output_loss[j] = Loss_GD
#popt, pcov = curve_fit(Tailfun, xdata, ydata, bounds=(0, fit_guess))
#popt[1]/alpha
#beta[l][j]=popt[1]/p
#beta_err[l][j]=np.sqrt(pcov[1,1])/p #Standard deviation needs to be divided by the scaling
#Plot
print('Running, iteration ', j,' of ', iter_per_prob)
print("Runtime: --- %s seconds ---" % (time.time() - start_time))
#
# plt.rcParams['figure.figsize'] = [10, 10]
#
#
# plt.plot(xdata, Tailfun(xdata, *popt), 'g--', label='fit: a=%5.3f, b=%5.10f, c=%5.3f' % tuple(popt))
# plt.plot(xdata, ydata, 'b-', label='data')
# plt.ylabel('Dropout Loss')
# plt.xlabel('Iteration')
# plt.legend()
# plt.show()
print('End of iterations for p=', p, ' Sigma= ', sigma)