# Model parameters
β = 0.994 # Discount factor
ν = 1 # Frisch elasticity
ψ = 1 # Normalization parameter for hours worked
θ = 0.75 # Calvo parameter
ϵ = 6 # Implies a steady-state markup of 20%
ϕ_pie = 1.5 # Taylor rule parameter
ϕ_y = 0.25 # Taylor rule parameter
Πss = 1.005 # Inlfation target
s_g = 0.2 # Share of government expenditures in output
ρ_b = 0.8 # Discount factor shock persistence
σ_b = 0.025 # Standard deviation of discount factor (preference) shock
Ass = 1 # Steady state of productivity shock
ρ_a = 0.9 # Persistence of TFP shock
σ_a = 0.025 # Standard deviation of roductivity shock
σ_m = 0.025 # Standard deviation of MP shock
ρ_g = 0.8 # Government expenditure shock
σ_g = 0.025 # Standard deviation of government expenditure shock
#Steady state
Rss = Πss/β
Pie_starss = ((1 - θ*Πss**(ϵ - 1))/(1 - θ))**(1/(1 - ϵ))
vss = (1 - θ)/(1 - θ*Πss**ϵ)*Pie_starss**(-ϵ)
g_y = s_g
c_y = (1 - s_g)
x_2ss = 1/c_y*Pie_starss/(1 - β*θ*Πss**(ϵ-1))
x_1ss = (ϵ-1)/ϵ*x_2ss
mcss = (1 - s_g)*(1 - β*θ*Πss**(ϵ))*x_1ss
wss = mcss
l_y = vss
l_c = l_y*1/c_y
lss = (wss/ψ*l_c)**(1/(1 + ν))
yss = lss/l_y
css = c_y*yss
Welcome to the TensorFlow Forum!
Which TF/Keras model you are using, what is the dataset type and shape according to which the hyperparamters can be tuned by understanding the model objective. Please share minimal reproducible code to replicate and understand the issue. Thank you.
Sorry, here is the full code
import tensorflow as tf
from tensorflow.keras import regularizers, initializers # Import the regularizers module
import numpy as np
#from tensorflow.keras.layers import Input, Dense
#from tensorflow.keras.models import Model
from math import sqrt
from matplotlib import pyplot as plt
from tqdm import tqdm as tqdm # tqdm is a nice library to visualize ongoing loops
import datetime
# followint lines are used for indicative typing
from typing import Tuple
class Vector: pass
# Model parameters
β = 0.994 # Discount factor
ν = 1 # Frisch elasticity
ψ = 1 # Normalization parameter for hours worked
θ = 0.75 # Calvo parameter
ϵ = 6 # Implies a steady-state markup of 20%
ϕ_pie = 1.5 # Taylor rule parameter
ϕ_y = 0.25 # Taylor rule parameter
Πss = 1.005 # Inlfation target
s_g = 0.2 # Share of government expenditures in output
ρ_b = 0.8 # Discount factor shock persistence
σ_b = 0.02 # Standard deviation of discount factor (preference) shock
Ass = 1 # Steady state of productivity shock
ρ_a = 0.9 # Persistence of TFP shock
σ_a = 0.025 # Standard deviation of roductivity shock
σ_m = 0.01 # Standard deviation of MP shock
ρ_g = 0.8 # Government expenditure shock
σ_g = 0.02 # Standard deviation of government expenditure shock
#Steady state
Rss = Πss/β
Pie_starss = ((1 - θ*Πss**(ϵ - 1))/(1 - θ))**(1/(1 - ϵ))
vss = (1 - θ)/(1 - θ*Πss**ϵ)*Pie_starss**(-ϵ)
g_y = s_g
c_y = (1 - s_g)
x_2ss = 1/c_y*Pie_starss/(1 - β*θ*Πss**(ϵ-1))
x_1ss = (ϵ-1)/ϵ*x_2ss
mcss = (1 - s_g)*(1 - β*θ*Πss**(ϵ))*x_1ss
wss = mcss
l_y = vss
l_c = l_y*1/c_y
lss = (wss/ψ*l_c)**(1/(1 + ν))
yss = lss/l_y
css = c_y*yss
# Standard deviations for ergodic distributions of exogenous state variables
σ_e_b = σ_b/(1-ρ_b**2)**0.5
σ_e_a = σ_a/(1-ρ_a**2)**0.5
σ_e_m = σ_m
σ_e_g = σ_g/(1-ρ_g**2)**0.5
# bounds for endogenous state variable
vmin = 1
vmax = 1.1
# construction of neural network
layers = [
tf.keras.layers.Dense(32, activation='tanh', input_dim=5, bias_initializer='he_uniform',kernel_initializer=initializers.GlorotUniform()),
tf.keras.layers.Dense(32, activation='tanh'),
tf.keras.layers.Dense(32, activation='tanh'),
tf.keras.layers.Dense(3, activation=tf.keras.activations.linear)
]
model = tf.keras.Sequential(layers)
print(model.summary())
def dr(z: Vector, a: Vector, m: Vector, g: Vector, v: Vector)-> Tuple[Vector, Vector, Vector]:
# we normalize exogenous state variables by their 2 standard deviations
# so that they are typically between -1 and 1
z = z/σ_e_b/2
a = a/σ_e_a/2
m = m/σ_e_a/2
g = g/σ_e_g/2
# we normalze income to be between -1 and 1
vmin = tf.math.reduce_min(v)
vmax = tf.math.reduce_max(v)
if vmax - vmin==0:
v = v
else:
v = (v-vmin)/(vmax-vmin)*2.0-1.0
# we prepare input to the perceptron
s = tf.concat([_e[:,None] for _e in [z,a,m,g, v]], axis=1) # equivalent to np.column_stack
#s = tf.concat([ζ[:, None], a[:, None], m[:, None], g[:, None], v[:, None]], axis=1)
x = model(s) # n x 3 matrix
# Consumption is always positive
muc = tf.exp( x[:,0] )
# We restrict inflation to be between -5.9 and 5.9
pie = 5.9 * tf.tanh(x[:, 1])
# No restriction on auxulary variable
x_1 = tf.exp(x[:,2])
return (muc, pie, x_1)
def Residuals(e_b: Vector, e_a: Vector, e_m: Vector, e_g: Vector, z: Vector, a: Vector, m: Vector, g: Vector, v: Vector):
# all inputs are expected to have the same size n
n = tf.size(a)
# arguments correspond to the values of the states today
muc, pie, x_1 = dr(z, a, m, g, v)
pie = 1 + pie/100
c = tf.exp(z)/muc
x_2 = ϵ/(ϵ-1)*x_1
y = c + s_g*yss*tf.exp(g)
pie_star = ((1 - θ*pie**(ϵ - 1))/(1 - θ))**(1/(1 - ϵ))
vnext = θ*pie**ϵ*v + (1 - θ)*pie_star**(-ϵ)
l = vnext/tf.exp(a)*y
w = ψ*l**ν*c
mc = w/tf.exp(a)
R = tf.maximum(Rss/(Πss**ϕ_pie*yss**ϕ_y)*pie**ϕ_pie*y**ϕ_y*tf.exp(m),1)
# transitions of the exogenous processes
anext = a*ρ_a + e_a
znext = z*ρ_b + e_b
mnext = 0*m + e_m
gnext = g*ρ_g + e_g
# (epsilon = (rnext, δnext, pnext, qnext))
mucnext, pienext, x_1next = dr(znext, anext, mnext, gnext, vnext)
pienext = 1 + pienext/100
cnext = tf.exp(znext)/mucnext
x_2next = ϵ/(ϵ-1)*x_1next
pie_starnext = ((1 - θ*pienext**(ϵ - 1))/(1 - θ))**(1/(1 - ϵ))
R1 = β*mucnext*R/pienext/muc - 1
R2 = (muc*mc*y + θ*β*pienext**ϵ*x_1next)/x_1 - 1
R3 = (pie_star*(y*muc + θ*β*pienext**(ϵ-1)/pie_starnext*x_2next))/x_1 - ϵ/(ϵ - 1)
return (R1, R2, R3)
@tf.function
def Objective(n,z,a,m,g,v): # objective function for DL training
# randomly drawing 1st realization for shocks
e1_b = tf.random.normal(shape=(n,), stddev=σ_b)
e1_a = tf.random.normal(shape=(n,), stddev=σ_a)
e1_m = tf.random.normal(shape=(n,), stddev=σ_m)
e1_g = tf.random.normal(shape=(n,), stddev=σ_g)
# randomly drawing 2nd realization for shocks
e2_b = tf.random.normal(shape=(n,), stddev=σ_b)
e2_a = tf.random.normal(shape=(n,), stddev=σ_a)
e2_m = tf.random.normal(shape=(n,), stddev=σ_m)
e2_g = tf.random.normal(shape=(n,), stddev=σ_g)
# residuals for n random grid points under 2 realizations of shocks
R1_e1, R2_e1, R3_e1 = Residuals(e1_b, e1_a, e1_m, e1_g, z, a, m, g, v)
R1_e2, R2_e2, R3_e2 = Residuals(e2_b, e2_a, e2_m, e2_g, z, a, m, g, v)
# Manually set the target value for the specific input z=0, k=Kss
z_specific = tf.constant([0.0])
a_specific = tf.constant([0.0])
m_specific = tf.constant([0.0])
g_specific = tf.constant([0.0])
v_specific = tf.constant([vss])
muc_specific_predicted, pie_specific_predicted, x_1_specific_predicted = dr(z_specific, a_specific, m_specific, g_specific, v_specific)
pie_specific_predicted = 1 + pie_specific_predicted/100
muc_specific_target = 1/css
pie_specific_target = Πss
x_1_specific_target = x_1ss
# construct all-in-one expectation operator
R_squared = R1_e1*R1_e2 + R2_e1*R2_e2 + R3_e1*R3_e2
mse_muc = tf.keras.losses.mean_squared_error(muc_specific_target, muc_specific_predicted)
mse_pie = tf.keras.losses.mean_squared_error(pie_specific_target, pie_specific_predicted)
mse_x1 = tf.keras.losses.mean_squared_error(x_1_specific_target, x_1_specific_predicted)
error = tf.reduce_mean(R_squared) + (mse_muc + mse_pie + mse_x1)
# compute average across n random draws
return error
@tf.function
def training_step(n,z,a,m,g,v):
with tf.GradientTape() as tape:
xx = Objective(n,z,a,m,g,v)
grads = tape.gradient(xx, model.trainable_variables)
optimizer.apply_gradients(zip(grads,model.trainable_variables))
grads_norm = tf.linalg.global_norm(grads)
return xx,grads_norm
def train_me(n, periods, K, threshold):
v = tf.random.uniform(shape=(n,), minval=vmin, maxval=vmin)
vals = []
for k in tqdm(range(K)):
z = tf.random.normal(shape=(n, periods), stddev=σ_e_b)
a = tf.random.normal(shape=(n, periods), stddev=σ_e_a)
m = tf.random.normal(shape=(n, periods), stddev=σ_e_m)
g = tf.random.normal(shape=(n, periods), stddev=σ_e_g)
for t in range(periods):
muc, pie, x_1 = dr(z[:, t], a[:, t], m[:, t], g[:, t], v)
pie = 1.0 + pie / 100.0
pie_star = ((1 - θ * tf.pow(pie, ϵ - 1)) / (1 - θ))**(1 / (1 - ϵ))
vnext = θ * tf.pow(pie, ϵ) * v + (1 - θ) * tf.pow(pie_star, -ϵ)
# transition of endogenous states
v = vnext
z_final = z[:,periods-1]
a_final = a[:,periods-1]
m_final = m[:,periods-1]
g_final = g[:,periods-1]
v_final = v
initial_weights = model.get_weights()
val, grads_norm = training_step(n, z_final, a_final, m_final, g_final, v_final)
vals.append(val)
final_weights = model.get_weights()
weight_difference = np.sum([(np.linalg.norm(w - w_initial))**2 for w, w_initial in zip(final_weights, initial_weights)])**0.5
if grads_norm < threshold or weight_difference < 1e-6 or weight_difference == np.nan:
print(f"Stopping training. Gradient Norm ({grads_norm}) is below the threshold.")
break
print("L2 Norm between existing and new weights:", weight_difference)
return vals, z, a, m, g, v
optimizer = tf.keras.optimizers.Adam()
n = 500
periods = 10
threshold = 1e-5
K = 50000
results, z, a, m, g, v = train_me(n,periods,K,threshold)
discobot
discobot
discobot
can you clarify what does it mean?
As you have larger computation, you need to use GPU to perform the computation faster. Please make sure you have enabled GPU with Tensorflow in your system or You can follow this link TF install for the TensorFlow GPU setup as per you system OS by checking the Hardware/Software requirements.
Along with that you can try with less number of epochs for model training, using ‘relu’ activation function and less number of units in Model layers to expedite the model training.
I have replicated the mentioned code in Google Colab in GPU mode and it has completed the computation in around 10 mins. (Attaching the replicated gist for your reference.)