diff --git a/Source/sdc/Castro_sdc.cpp b/Source/sdc/Castro_sdc.cpp
index d82d2764e9..06860f4fd1 100644
--- a/Source/sdc/Castro_sdc.cpp
+++ b/Source/sdc/Castro_sdc.cpp
@@ -133,6 +133,12 @@ Castro::do_sdc_update(int m_start, int m_end, Real dt)
         const Box& bx = mfi.tilebox();
         const Box& bx1 = mfi.growntilebox(1);
 
+        // this is the starting data
+        Array4<const Real> const& k_new_m_start_arr = (k_new[m_start])->array(mfi);
+
+        // this is where the update will be stored
+        Array4<Real> const& k_new_m_end_arr = (k_new[m_end])->array(mfi);
+
 #ifdef REACTIONS
         // advection + reactions
         if (sdc_order == 2)
@@ -171,8 +177,6 @@ Castro::do_sdc_update(int m_start, int m_end, Real dt)
 
             }
 
-            auto k_m = (*k_new[m_start]).array(mfi);
-            auto k_n = (*k_new[m_end]).array(mfi);
             auto A_m = (*A_new[m_start]).array(mfi);
             auto A_n = (*A_new[m_end]).array(mfi);
             auto C_arr = C2.array();
@@ -180,7 +184,9 @@ Castro::do_sdc_update(int m_start, int m_end, Real dt)
             amrex::ParallelFor(bx,
             [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
             {
-                sdc_update_o2(i, j, k, k_m, k_n, A_m, A_n, C_arr, dt_m, sdc_iteration, m_start);
+                sdc_update_o2(i, j, k,
+                              k_new_m_start_arr, k_new_m_end_arr,
+                              A_m, A_n, C_arr, dt_m, sdc_iteration, m_start);
             });
         }
         else
@@ -189,10 +195,7 @@ Castro::do_sdc_update(int m_start, int m_end, Real dt)
             // fourth order SDC reaction update -- we need to respect the
             // difference between cell-centers and averages
 
-            Array4<const Real> const& k_new_m_start_arr=
-                (k_new[m_start])->array(mfi);
-            Array4<Real> const& k_new_m_end_arr=(k_new[m_end])->array(mfi);
-            Array4<const Real> const& C_source_arr=C_source.array(mfi);
+            Array4<const Real> const& C_source_arr = C_source.array(mfi);
 
             // convert the starting U to cell-centered on a fab-by-fab basis
             // -- including one ghost cell
@@ -264,9 +267,6 @@ Castro::do_sdc_update(int m_start, int m_end, Real dt)
 
         }
 #else
-        Array4<const Real> const& k_new_m_start_arr=
-            (k_new[m_start])->array(mfi);
-        Array4<Real> const& k_new_m_end_arr=(k_new[m_end])->array(mfi);
         Array4<const Real> const& A_new_arr=(A_new[m_start])->array(mfi);
         Array4<const Real> const& A_old_0_arr=(A_old[0])->array(mfi);
         Array4<const Real> const& A_old_1_arr=(A_old[1])->array(mfi);
@@ -317,6 +317,12 @@ Castro::do_sdc_update(int m_start, int m_end, Real dt)
         }
 #endif
 
+        amrex::ParallelFor(bx,
+        [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
+        {
+            normalize_species_sdc(i, j, k, k_new_m_end_arr);
+        });
+
     }
 }
 
diff --git a/Source/sdc/sdc_newton_solve.H b/Source/sdc/sdc_newton_solve.H
index 500e31092e..ca0bb5d424 100644
--- a/Source/sdc/sdc_newton_solve.H
+++ b/Source/sdc/sdc_newton_solve.H
@@ -109,7 +109,7 @@ sdc_newton_solve(const Real dt_m,
 
     Array1D<Real, 1, NumSpec+1> f;
 
-    const int MAX_ITER = 100;
+    const int MAX_ITER = 200;
 
     ierr = newton::NEWTON_SUCCESS;
 
@@ -251,7 +251,7 @@ sdc_newton_subdivide(const Real dt_m,
     // converges or reaches our limit on the number of
     // subintervals.
 
-    const int MAX_NSUB = 64;
+    const int MAX_NSUB = 128;
     GpuArray<Real, NUM_STATE> U_begin;
 
     // subdivide the timestep and do multiple Newtons. We come
@@ -308,6 +308,10 @@ sdc_newton_subdivide(const Real dt_m,
         }
         nsub *= 2;
     }
+
+    if (ierr != newton::NEWTON_SUCCESS) {
+        std::cout << "Falled. " << ierr << std::endl;
+    }
 }
 #endif