Commit 42ef1db5 authored by Senen Cerda, A.'s avatar Senen Cerda, A.
Browse files

Initial commit

parents
This diff is collapsed.
#!/usr/bin/env python3
#SBATCH --partition=mcs.gpu.q
#SBATCH --output=openme_linear.out
#CLUSTER SIMULATION CODE
import numpy as np #import numpy package
import time
from sklearn.decomposition import PCA #import PCA for whitening
from numpy import linalg as la
from scipy.optimize import curve_fit
from numpy import asarray
from numpy import save
#import required keras modules
from keras.models import Sequential
from keras.layers import Dense, Dropout
#import tensorflow and pandas
#from __future__ import absolute_import, division, print_function, unicode_literals
import pandas as pd
import tensorflow as tf
#import tensorflow.transform as tft
#Loading Data
DATA_PATH= "superconduct/train.csv"
Data_Raw = pd.read_csv(DATA_PATH)
X_Raw = Data_Raw.drop(columns=["critical_temp"]).astype('float64')
Y_Raw = Data_Raw["critical_temp"].astype('float64')
#PCA Data Whitening
pca = PCA(whiten=True)
whitened = pca.fit_transform(X_Raw)
Data_Matrix = np.dot(Y_Raw.to_numpy(), whitened)
Data_Matrix =Data_Matrix/la.norm(Data_Matrix) # We let the matrix Y to have norm 1.
print('START SIMULATION DROPOUT WITH LINEAR ACTIVATION WITH GAUSSIAN INITIALIZATION')
#Structure constants
#p = 0.5
dim_output = 1
dim_hidden = 20
dim_input = Data_Matrix.shape[0]
#initialization constants
#sigma=0.1
##Intermediate variables
matrix2_dim = (dim_output, dim_hidden)
matrix1_dim = (dim_hidden, dim_input)
#Expectation Dropout Loss Linear NN, we take lambda as the only parameter depending on p
def Det_Droput_Linear_NN_Loss( Y, A, B, p):
C = A @ B
BB = B @ B.transpose()
AA = A.transpose() @ A
return (la.norm( Y - p*C))**2 + p*(1-p)*np.dot( np.diagonal(AA), \
np.diagonal(BB))
def Batch_Stoch_Dropout_Linear_NN_Loss( Y, A, B, p, size):
fun=0
for i in range(size):
F_A = np.multiply(A, np.random.binomial(1, p, size= matrix2_dim).astype('float64'))
F_B = np.multiply(B, np.random.binomial(1, p, size= matrix1_dim).astype('float64'))
fun=+ la.norm( Y - (A * F_A) @ (B * F_B))**2
return fun/size
def Det_Gradient_Dropout_Linear_NN_Loss(Y, A, B, p):
C = A @ B
BB = B @ B.transpose()
AA = A.transpose() @ A
Bt=B.transpose()
At=A.transpose()
return -2*(p)*(( Y - p*C) @ Bt) + 2*p*(1-p)*(A @ np.diag(np.diagonal(BB))), \
-2*(p)*(At@( Y - p*C)) + 2*p*(1-p)*( np.diag(np.diagonal(AA)) @ B)
def Batch_Stoch_Gradient_Dropout_Linear_NN_Loss( Y, A, B, p, size):
Delta_2 = np.zeros(matrix2_dim)
Delta_1 = np.zeros(matrix1_dim)
for i in range(size):
F_A = np.random.binomial(1, p, size= matrix2_dim).astype('float64')
F_B = np.random.binomial(1, p, size= matrix1_dim).astype('float64')
Af = A * F_A
Bf = B * F_B
Cf = Af @ Bf
Delta_2 =+ -2*F_A*(( Y - Cf )@ Bf.transpose())
Delta_1 =+ -2*(Af.transpose()@( Y - Cf))*F_B
return Delta_2, Delta_1
# Convergence Rate estimatior
def Tailfun(x,a,b,c):
return a*np.exp(-(x*b)) + c
def Approximate_linear(x, a,b,c):
return a*(1-b*x) + c
def Rate_Fit(L, alpha): #In order to have approximation to gradient, stepsize should be one when close to convergenece
total = L.shape[0]
xdata = np.array([x * 0.1 for x in range(total//5, total)])
ydata = L[total//5:total]
opt, cov = curve_fit(Tailfun, xdata, ydata)
return opt[1]/alpha
print('START SIMULATION DROPOUT WITH EPSILON-CLOSE INITIALIZATION WITH LINEAR ACTIVATION')
#Define Global variables that define solution
#U, eta, V = np.linalg.svd(Data_Matrix)
U= np.array([1.0])
eta = 1.0
V = np.array(Data_Matrix)
#p = 0.7
f = 20
#print(eta)
#print(V)
#eta_star = eta*(p*f/(p*f + 1-p)) #Maybe we need to multiply by p, since we have scaled the function.
#theta = np.sqrt(eta_star)
#CHECK OUT VARIABLES
#print(V)
#Structure constants
#p = 0.5
dim_output = 1
dim_hidden = 20
dim_input = Data_Matrix.shape[0]
#initialization constants
#sigma=0.1
##Intermediate variables
matrix2_dim = (dim_output, dim_hidden)
matrix1_dim = (dim_hidden, dim_input)
#Find Random minimum and add noise
def random_minima(a,b):
eta_star = eta*(a*b/(a*b + 1-a))/a
theta = np.sqrt(eta_star)
S = (2*np.random.binomial(1, 0.5, f) -1)/ np.sqrt(f)
return theta*S, theta*np.outer(S, V)
def initialization_epsilon_close(e, a, b):
A= np.random.normal(0, e, size = matrix2_dim)
B = np.random.normal(0, e, size = matrix1_dim)
#normalization = np.sqrt(np.linalg.norm(A)**2 + np.linalg.norm(B)**2)
#A = A/normalization
#B = B/normalization
C, D = random_minima(a,b)
return C + A, D + B
#Output dimension is one, we can find an explicit solution; refer to proposition 1 in paper.
#for i in range(1):
# Q, R = initialization_epsilon_close(0.01*i)
# print(np.linalg.svd(Q,full_matrices=False))
# print(np.transpose(np.linalg.svd(R,full_matrices=False)[0])[0])
for s in range(3):
#For plotting
e = 0.05 + s*0.05
iter_per_prob = 10 #change this
#iter_per_prob = 3 #change this
#beta = np.zeros((8,iter_per_prob))
#beta_err = np.zeros((8,iter_per_prob))
##parameters GD
#sigma=0.2 #random initialization standard deviation
rate = 0.01 # Learning rate
precision = 0.001 #approximation error
maxiter = 500000 # maximum number of iterationss
#initial_data = 15000 #Start fitting here
#maxiter = 100 # maximum number of iterationss
#initial_data = 1 #Start fitting here
#output_loss =np.zeros((iter_per_prob, maxiter))
for l in range(0,9):
p=(l+2)*0.1
output_loss =np.zeros((iter_per_prob, maxiter))
for j in range(0,iter_per_prob):
start_time = time.time()
## Variable Initialization
i = 0
error = 1
W_2, W_1 = initialization_epsilon_close(e, p, f)
## Data Trackers
Loss_GD = np.array([])
#Gradient Descent
## Procedure
while (i < maxiter) : #and (error > precision):
D_2, D_1 = Det_Gradient_Dropout_Linear_NN_Loss(Data_Matrix, W_2, W_1, p)
W_2, W_1 = W_2 - rate* D_2, W_1 - rate* D_1
error = np.sqrt(la.norm(D_2)**2 + la.norm(D_1)**2)
Loss_GD = np.append(Loss_GD, error) #Det_Droput_Linear_NN_Loss( Data_Matrix, W_2, W_1, p))
if (error < 1e-10):
i=i+1
Loss_GD = np.append(Loss_GD, np.zeros(maxiter - i))
break
#print(Loss[i], error)# MAYBE HERE PUTTING THE NORM OF THE GRADIENT IS BETTER
i = i + 1
#K, l, P = np.linalg.svd(W_2 @ W_1, full_matrices=False)
#eta_star_1 = p*eta*(p*f/(p*f + 1-p)) #Maybe we need to multiply by p, since we have scaled the function.
#print(l - eta_star_1)
#print(V - P)
##Data Fitting
#fit_guess = [0.5, 0.12, 0.1]
#dynamic_initial=int(initial_data/p) #and (error > divided by p):
#total = Loss_GD.shape[0]
#xdata = np.array([0.01*x for x in range(dynamic_initial, total)])
#ydata = Loss_GD[dynamic_initial:total]
output_loss[j] = Loss_GD
#popt, pcov = curve_fit(Tailfun, xdata, ydata, bounds=(0, fit_guess))
#popt[1]/alpha
#beta[l][j]=popt[1]/p
#beta_err[l][j]=np.sqrt(pcov[1,1])/p #Standard deviation needs to be divided by the scaling
#Plot
print('Running, iteration ', j,' of ', iter_per_prob)
print("Runtime: --- %s seconds ---" % (time.time() - start_time))
#
# plt.rcParams['figure.figsize'] = [10, 10]
#
#
# plt.plot(xdata, Tailfun(xdata, *popt), 'g--', label='fit: a=%5.3f, b=%5.10f, c=%5.3f' % tuple(popt))
# plt.plot(xdata, ydata, 'b-', label='data')
# plt.ylabel('Dropout Loss')
# plt.xlabel('Iteration')
# plt.legend()
# plt.show()
print('End of iterations for p=', p, ' e= ', e)
print(output_loss)
print('Saving array...')
output_string = "output_linear_final/output_linear_2_p_{:.1f}_e_{:.2f}.npy".format(p, e)
# save to npy file
save(output_string, output_loss)
#print(beta)
#print(beta_err)
print('END SIMULATION LINEAR COMPARISON DIFFERENT p')
print('')
print('')
print('')
print('')
print('')
print('')
print('')
print('')
print('')
print('')
print('')
print('')
print('')
print('')
print('')
print('')
print('START SIMULATION LINEAR COMPARISON DIFFERENT f')
#####################
#####################
#START CHANGING f
#####################
#####################
for s in range(6): #Add 6 for normal operation
iter_per_f = 10
e=0.001 + s*0.002 #Change e for different variances
#beta_f = np.zeros((19,iter_per_f))
#beta_err_f = np.zeros((19,iter_per_f))
##parameters GD
#sigma=0.2 #random initialization standard deviation
rate = 0.01 # Learning rate
precision = 0.001 #approximation error
maxiter = 500000 # maximum number of iterationss
#initial_data = 1 #Start fitting here
p=0.7 #Fix p and vary f
for l in range(0,19):
f = (1+l)*2
output_loss = np.zeros((iter_per_f, maxiter))
for j in range(0,iter_per_f):
start_time = time.time() #Time counter
dim_output = 1
dim_hidden = f
dim_input = Data_Matrix.shape[0] #input is 80
##Intermediate variables
matrix2_dim = (dim_output, dim_hidden)
matrix1_dim = (dim_hidden, dim_input)
#p=0.7
## Variable Initialization
i = 0
error = 1
W_2, W_1 = initialization_epsilon_close(e, p, f)
## Data Trackers
Loss_GD = np.array([])
#Gradient Descent
## Procedure
while (i < maxiter) : #and (error > precision):
D_2, D_1 = Det_Gradient_Dropout_Linear_NN_Loss(Data_Matrix, W_2, W_1, p)
W_2, W_1 = W_2 - rate* D_2, W_1 - rate* D_1
error = np.sqrt(la.norm(D_2)**2 + la.norm(D_1)**2)
Loss_GD = np.append(Loss_GD, error) #Det_Droput_Linear_NN_Loss( Data_Matrix, W_2, W_1, p))
if (error < 1e-10):
i=i+1
Loss_GD = np.append(Loss_GD, np.zeros(maxiter - i))
break
#print(Loss[i], error) #Maybe it is better to output the norm of the gradient of the signal
i = i + 1
##Data Fitting
#fit_guess = [1., 0.2, 0.2]
#dynamic_initial=int(initial_data/p)
#total = Loss_GD.shape[0]
#xdata = np.array([0.01*x for x in range(dynamic_initial, total)]) #Scale axis for better fit
#ydata = Loss_GD[dynamic_initial:total]
#popt, pcov = curve_fit(Tailfun, xdata, ydata, bounds=(0, fit_guess))
#popt[1]/alpha
#beta_f[l][j]=popt[1]/(p*p)
#beta_err_f[l][j]=np.sqrt(pcov[1,1])/(p*p) #Standard deviation needs to be divided by the scaling
##Plot
output_loss[j] = Loss_GD
#Plot
print('Running, iteration ', j,' of ', iter_per_f, '; f=', f)
print("Runtime: --- %s seconds ---" % (time.time() - start_time))
#print('End of iterations for f=', f, ' Sigma= ', sigma)
#print(output_loss,';')
#print(output_loss)
print('Saving array...')
output_string = "output_linear_final/output_linear_3_f_{:.0f}_e_{:.3f}.npy".format(f, e)
# save to npy file
save(output_string, output_loss)
# plt.rcParams['figure.figsize'] = [10, 10]
# plt.plot(xdata, Tailfun(xdata, *popt), 'g--', label='fit: a=%5.3f, b=%5.10f, c=%5.3f' % tuple(popt))
# plt.plot(xdata, ydata, 'b-', label='data')
# plt.ylabel('Dropout Loss')
# plt.xlabel('Iteration')
# plt.legend()
# plt.show()
print('END OF SIMULATION DROPOUT WITH EPSILON-CLOSE INITIALIZATION WITH LINEAR ACTIVATION')
#!/usr/bin/env python3
#SBATCH --partition=mcs.gpu.q
#SBATCH --output=openme.out
#CLUSTER SIMULATION CODE
import numpy as np #import numpy package
import time
from sklearn.decomposition import PCA #import PCA for whitening
from numpy import linalg as la
from scipy.optimize import curve_fit
from numpy import asarray
from numpy import save
#import required keras modules
from keras.models import Sequential
from keras.layers import Dense, Dropout
#import tensorflow and pandas
#from __future__ import absolute_import, division, print_function, unicode_literals
import pandas as pd
import tensorflow as tf
#import tensorflow.transform as tft
#Loading Data
DATA_PATH= "superconduct/train.csv"
Data_Raw = pd.read_csv(DATA_PATH)
X_Raw = Data_Raw.drop(columns=["critical_temp"]).astype('float64')
Y_Raw = Data_Raw["critical_temp"].astype('float64')
#PCA Data Whitening
pca = PCA(whiten=True)
whitened = pca.fit_transform(X_Raw)
Data_Matrix = np.dot(Y_Raw.to_numpy(), whitened)
Data_Matrix =Data_Matrix/la.norm(Data_Matrix) # We let the matrix Y to have norm 1.
print('START SIMULATION DROPOUT WITH LINEAR ACTIVATION')
#Structure constants
#p = 0.5
dim_output = 1
dim_hidden = 20
dim_input = Data_Matrix.shape[0]
#initialization constants
#sigma=0.1
##Intermediate variables
matrix2_dim = (dim_output, dim_hidden)
matrix1_dim = (dim_hidden, dim_input)
#Expectation Dropout Loss Linear NN
def Det_Droput_Linear_NN_Loss( Y, A, B, p):
C = A @ B
BB = B @ B.transpose()
AA = A.transpose() @ A
return (la.norm( Y - p*C))**2 + p*(1-p)*np.dot( np.diagonal(AA), \
np.diagonal(BB))
def Batch_Stoch_Dropout_Linear_NN_Loss( Y, A, B, p, size):
fun=0
for i in range(size):
F_A = np.multiply(A, np.random.binomial(1, p, size= matrix2_dim).astype('float64'))
F_B = np.multiply(B, np.random.binomial(1, p, size= matrix1_dim).astype('float64'))
fun=+ la.norm( Y - (A * F_A) @ (B * F_B))**2
return fun/size
def Det_Gradient_Dropout_Linear_NN_Loss(Y, A, B, p):
C = A @ B
BB = B @ B.transpose()
AA = A.transpose() @ A
Bt=B.transpose()
At=A.transpose()
return -2*(p)*(( Y - p*C) @ Bt) + 2*p*(1-p)*(A @ np.diag(np.diagonal(BB))), \
-2*(p)*(At@( Y - p*C)) + 2*p*(1-p)*( np.diag(np.diagonal(AA)) @ B)
def Batch_Stoch_Gradient_Dropout_Linear_NN_Loss( Y, A, B, p, size):
Delta_2 = np.zeros(matrix2_dim)
Delta_1 = np.zeros(matrix1_dim)
for i in range(size):
F_A = np.random.binomial(1, p, size= matrix2_dim).astype('float64')
F_B = np.random.binomial(1, p, size= matrix1_dim).astype('float64')
Af = A * F_A
Bf = B * F_B
Cf = Af @ Bf
Delta_2 =+ -2*F_A*(( Y - Cf )@ Bf.transpose())
Delta_1 =+ -2*(Af.transpose()@( Y - Cf))*F_B
return Delta_2, Delta_1
# Convergence Rate estimatior
def Tailfun(x,a,b,c):
return a*np.exp(-(x*b)) + c
def Approximate_linear(x, a,b,c):
return a*(1-b*x) + c
def Rate_Fit(L, alpha): #In order to have approximation to gradient, stepsize should be one when close to convergenece
total = L.shape[0]
xdata = np.array([x * 0.1 for x in range(total//5, total)])
ydata = L[total//5:total]
opt, cov = curve_fit(Tailfun, xdata, ydata)
return opt[1]/alpha
#Start Procedure comparison for different p and fixed Sigma
### Linear NN Dropout Training ###
def Initialization_Gaussian(eta):
return np.random.normal(0, eta, size = matrix2_dim), np.random.normal(0, eta, size = matrix1_dim)
for s in range(0,3):
#For plotting
sigma = 0.1 + s*0.1
iter_per_prob = 10
#beta = np.zeros((8,iter_per_prob))
#beta_err = np.zeros((8,iter_per_prob))
##parameters GD
#sigma=0.2 #random initialization standard deviation
rate = 0.01 # Learning rate
precision = 0.001 #approximation error
maxiter = 500000 # maximum number of iterationss
initial_data = 15000 #Start fitting here
#output_loss =np.zeros((iter_per_prob, maxiter))
for l in range(0,8):
p=(l+2)*0.1
output_loss =np.zeros((iter_per_prob, maxiter))
for j in range(0,iter_per_prob):
start_time = time.time()
## Variable Initialization
i = 0
error = 1
W_2, W_1 = Initialization_Gaussian(sigma)
## Data Trackers
Loss_GD = np.array([])
#Gradient Descent
## Procedure
while (i < maxiter) : #and (error > precision):
D_2, D_1 = Det_Gradient_Dropout_Linear_NN_Loss(Data_Matrix, W_2, W_1, p)
W_2, W_1 = W_2 - rate* D_2, W_1 - rate* D_1
error = la.norm(D_2) + la.norm(D_1)
Loss_GD = np.append(Loss_GD, error) #Det_Droput_Linear_NN_Loss( Data_Matrix, W_2, W_1, p))
if (error < 1e-9):
i=i+1
Loss_GD = np.append(Loss_GD, np.zeros(maxiter - i))
break
#Loss_GD = np.append(Loss_GD, error) #Det_Droput_Linear_NN_Loss( Data_Matrix, W_2, W_1, p))
#print(Loss[i], error)# MAYBE HERE PUTTING THE NORM OF THE GRADIENT IS BETTER
i = i + 1
##Data Fitting
#fit_guess = [0.5, 0.12, 0.1]
#dynamic_initial=int(initial_data/p) #and (error > divided by p):
#total = Loss_GD.shape[0]
#xdata = np.array([0.01*x for x in range(dynamic_initial, total)])
#ydata = Loss_GD[dynamic_initial:total]
output_loss[j] = Loss_GD
#popt, pcov = curve_fit(Tailfun, xdata, ydata, bounds=(0, fit_guess))
#popt[1]/alpha
#beta[l][j]=popt[1]/p
#beta_err[l][j]=np.sqrt(pcov[1,1])/p #Standard deviation needs to be divided by the scaling
#Plot
print('Running, iteration ', j,' of ', iter_per_prob)
print("Runtime: --- %s seconds ---" % (time.time() - start_time))
#
# plt.rcParams['figure.figsize'] = [10, 10]
#
#
# plt.plot(xdata, Tailfun(xdata, *popt), 'g--', label='fit: a=%5.3f, b=%5.10f, c=%5.3f' % tuple(popt))
# plt.plot(xdata, ydata, 'b-', label='data')
# plt.ylabel('Dropout Loss')
# plt.xlabel('Iteration')
# plt.legend()
# plt.show()
print('End of iterations for p=', p, ' Sigma= ', sigma)
#print(output_loss)
print('Saving array...')
output_string = "output_linear_sigma\output_linear_p_{:.1f}_sigma_{:.1f}.npy".format(p, sigma)
# save to npy file
save(output_string, output_loss)
#print(beta)
#print(beta_err)