diff --git a/test/convolve_test.cc b/test/convolve_test.cc
index 6af2abb79f66d94498634035e1d1a66a14a9ae11..2fcb2df9564cdc0ba2bc8bb164b4704f18f76eeb 100644
--- a/test/convolve_test.cc
+++ b/test/convolve_test.cc
@@ -264,7 +264,7 @@ TEST_P(ConvolveTest, CopyHoriz) {
   uint8_t* const out = output();
   DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0};
 
-  REGISTER_STATE_CHECK(
+  ASM_REGISTER_STATE_CHECK(
       UUT_->h8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16,
                 Width(), Height()));
 
@@ -281,7 +281,7 @@ TEST_P(ConvolveTest, CopyVert) {
   uint8_t* const out = output();
   DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0};
 
-  REGISTER_STATE_CHECK(
+  ASM_REGISTER_STATE_CHECK(
       UUT_->v8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16,
                 Width(), Height()));
 
@@ -298,7 +298,7 @@ TEST_P(ConvolveTest, Copy2D) {
   uint8_t* const out = output();
   DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0};
 
-  REGISTER_STATE_CHECK(
+  ASM_REGISTER_STATE_CHECK(
       UUT_->hv8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16,
                  Width(), Height()));
 
@@ -356,17 +356,17 @@ TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) {
                            Width(), Height());
 
         if (filters == eighttap_smooth || (filter_x && filter_y))
-          REGISTER_STATE_CHECK(
+          ASM_REGISTER_STATE_CHECK(
               UUT_->hv8_(in, kInputStride, out, kOutputStride,
                          filters[filter_x], 16, filters[filter_y], 16,
                          Width(), Height()));
         else if (filter_y)
-          REGISTER_STATE_CHECK(
+          ASM_REGISTER_STATE_CHECK(
               UUT_->v8_(in, kInputStride, out, kOutputStride,
                         kInvalidFilter, 16, filters[filter_y], 16,
                         Width(), Height()));
         else
-          REGISTER_STATE_CHECK(
+          ASM_REGISTER_STATE_CHECK(
               UUT_->h8_(in, kInputStride, out, kOutputStride,
                         filters[filter_x], 16, kInvalidFilter, 16,
                         Width(), Height()));
@@ -414,17 +414,17 @@ TEST_P(ConvolveTest, MatchesReferenceAveragingSubpixelFilter) {
                                    Width(), Height());
 
         if (filters == eighttap_smooth || (filter_x && filter_y))
-          REGISTER_STATE_CHECK(
+          ASM_REGISTER_STATE_CHECK(
               UUT_->hv8_avg_(in, kInputStride, out, kOutputStride,
                              filters[filter_x], 16, filters[filter_y], 16,
                              Width(), Height()));
         else if (filter_y)
-          REGISTER_STATE_CHECK(
+          ASM_REGISTER_STATE_CHECK(
               UUT_->v8_avg_(in, kInputStride, out, kOutputStride,
                             filters[filter_x], 16, filters[filter_y], 16,
                             Width(), Height()));
         else
-          REGISTER_STATE_CHECK(
+          ASM_REGISTER_STATE_CHECK(
               UUT_->h8_avg_(in, kInputStride, out, kOutputStride,
                             filters[filter_x], 16, filters[filter_y], 16,
                             Width(), Height()));
@@ -494,9 +494,10 @@ TEST_P(ConvolveTest, ChangeFilterWorks) {
    */
 
   /* Test the horizontal filter. */
-  REGISTER_STATE_CHECK(UUT_->h8_(in, kInputStride, out, kOutputStride,
-                                 kChangeFilters[kInitialSubPelOffset],
-                                 kInputPixelStep, NULL, 0, Width(), Height()));
+  ASM_REGISTER_STATE_CHECK(
+      UUT_->h8_(in, kInputStride, out, kOutputStride,
+                kChangeFilters[kInitialSubPelOffset],
+                kInputPixelStep, NULL, 0, Width(), Height()));
 
   for (int x = 0; x < Width(); ++x) {
     const int kFilterPeriodAdjust = (x >> 3) << 3;
@@ -508,9 +509,10 @@ TEST_P(ConvolveTest, ChangeFilterWorks) {
   }
 
   /* Test the vertical filter. */
-  REGISTER_STATE_CHECK(UUT_->v8_(in, kInputStride, out, kOutputStride,
-                                 NULL, 0, kChangeFilters[kInitialSubPelOffset],
-                                 kInputPixelStep, Width(), Height()));
+  ASM_REGISTER_STATE_CHECK(
+      UUT_->v8_(in, kInputStride, out, kOutputStride,
+                NULL, 0, kChangeFilters[kInitialSubPelOffset],
+                kInputPixelStep, Width(), Height()));
 
   for (int y = 0; y < Height(); ++y) {
     const int kFilterPeriodAdjust = (y >> 3) << 3;
@@ -522,12 +524,11 @@ TEST_P(ConvolveTest, ChangeFilterWorks) {
   }
 
   /* Test the horizontal and vertical filters in combination. */
-  REGISTER_STATE_CHECK(UUT_->hv8_(in, kInputStride, out, kOutputStride,
-                                  kChangeFilters[kInitialSubPelOffset],
-                                  kInputPixelStep,
-                                  kChangeFilters[kInitialSubPelOffset],
-                                  kInputPixelStep,
-                                  Width(), Height()));
+  ASM_REGISTER_STATE_CHECK(
+      UUT_->hv8_(in, kInputStride, out, kOutputStride,
+                 kChangeFilters[kInitialSubPelOffset], kInputPixelStep,
+                 kChangeFilters[kInitialSubPelOffset], kInputPixelStep,
+                 Width(), Height()));
 
   for (int y = 0; y < Height(); ++y) {
     const int kFilterPeriodAdjustY = (y >> 3) << 3;
@@ -560,10 +561,10 @@ TEST_P(ConvolveTest, CheckScalingFiltering) {
   for (int frac = 0; frac < 16; ++frac) {
     for (int step = 1; step <= 32; ++step) {
       /* Test the horizontal and vertical filters in combination. */
-      REGISTER_STATE_CHECK(UUT_->hv8_(in, kInputStride, out, kOutputStride,
-                                      eighttap[frac], step,
-                                      eighttap[frac], step,
-                                      Width(), Height()));
+      ASM_REGISTER_STATE_CHECK(UUT_->hv8_(in, kInputStride, out, kOutputStride,
+                                          eighttap[frac], step,
+                                          eighttap[frac], step,
+                                          Width(), Height()));
 
       CheckGuardBlocks();
 
diff --git a/test/dct16x16_test.cc b/test/dct16x16_test.cc
index e6a20fb4144283aa3f381dab941692567d6a7497..b7c2dcc81fdc0d7d7666a71712fe54f90ae80e86 100644
--- a/test/dct16x16_test.cc
+++ b/test/dct16x16_test.cc
@@ -311,9 +311,9 @@ class Trans16x16TestBase {
         test_input_block[j] = src[j] - dst[j];
       }
 
-      REGISTER_STATE_CHECK(RunFwdTxfm(test_input_block,
-                                      test_temp_block, pitch_));
-      REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
+      ASM_REGISTER_STATE_CHECK(RunFwdTxfm(test_input_block,
+                                          test_temp_block, pitch_));
+      ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
 
       for (int j = 0; j < kNumCoeffs; ++j) {
         const uint32_t diff = dst[j] - src[j];
@@ -344,7 +344,7 @@ class Trans16x16TestBase {
         input_block[j] = rnd.Rand8() - rnd.Rand8();
 
       fwd_txfm_ref(input_block, output_ref_block, pitch_, tx_type_);
-      REGISTER_STATE_CHECK(RunFwdTxfm(input_block, output_block, pitch_));
+      ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_block, output_block, pitch_));
 
       // The minimum quant value is 4.
       for (int j = 0; j < kNumCoeffs; ++j)
@@ -375,8 +375,8 @@ class Trans16x16TestBase {
       }
 
       fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_);
-      REGISTER_STATE_CHECK(RunFwdTxfm(input_extreme_block,
-                                      output_block, pitch_));
+      ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_extreme_block,
+                                          output_block, pitch_));
 
       // The minimum quant value is 4.
       for (int j = 0; j < kNumCoeffs; ++j) {
@@ -421,7 +421,7 @@ class Trans16x16TestBase {
       for (int j = 1; j < kNumCoeffs; ++j)
         output_ref_block[j] = (output_ref_block[j] / ac_thred) * ac_thred;
       inv_txfm_ref(output_ref_block, ref, pitch_, tx_type_);
-      REGISTER_STATE_CHECK(RunInvTxfm(output_ref_block, dst, pitch_));
+      ASM_REGISTER_STATE_CHECK(RunInvTxfm(output_ref_block, dst, pitch_));
 
       for (int j = 0; j < kNumCoeffs; ++j)
         EXPECT_EQ(ref[j], dst[j]);
@@ -450,7 +450,7 @@ class Trans16x16TestBase {
       for (int j = 0; j < kNumCoeffs; ++j)
         coeff[j] = round(out_r[j]);
 
-      REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, 16));
+      ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, 16));
 
       for (int j = 0; j < kNumCoeffs; ++j) {
         const uint32_t diff = dst[j] - src[j];
diff --git a/test/dct32x32_test.cc b/test/dct32x32_test.cc
index 501c69621cfb0d082cbc42b115cdba9101bd1f68..02250a97baf172c720785dc31eea589a94fb193d 100644
--- a/test/dct32x32_test.cc
+++ b/test/dct32x32_test.cc
@@ -112,8 +112,8 @@ TEST_P(Trans32x32Test, AccuracyCheck) {
       test_input_block[j] = src[j] - dst[j];
     }
 
-    REGISTER_STATE_CHECK(fwd_txfm_(test_input_block, test_temp_block, 32));
-    REGISTER_STATE_CHECK(inv_txfm_(test_temp_block, dst, 32));
+    ASM_REGISTER_STATE_CHECK(fwd_txfm_(test_input_block, test_temp_block, 32));
+    ASM_REGISTER_STATE_CHECK(inv_txfm_(test_temp_block, dst, 32));
 
     for (int j = 0; j < kNumCoeffs; ++j) {
       const uint32_t diff = dst[j] - src[j];
@@ -150,7 +150,7 @@ TEST_P(Trans32x32Test, CoeffCheck) {
 
     const int stride = 32;
     vp9_fdct32x32_c(input_block, output_ref_block, stride);
-    REGISTER_STATE_CHECK(fwd_txfm_(input_block, output_block, stride));
+    ASM_REGISTER_STATE_CHECK(fwd_txfm_(input_block, output_block, stride));
 
     if (version_ == 0) {
       for (int j = 0; j < kNumCoeffs; ++j)
@@ -189,7 +189,8 @@ TEST_P(Trans32x32Test, MemCheck) {
 
     const int stride = 32;
     vp9_fdct32x32_c(input_extreme_block, output_ref_block, stride);
-    REGISTER_STATE_CHECK(fwd_txfm_(input_extreme_block, output_block, stride));
+    ASM_REGISTER_STATE_CHECK(
+        fwd_txfm_(input_extreme_block, output_block, stride));
 
     // The minimum quant value is 4.
     for (int j = 0; j < kNumCoeffs; ++j) {
@@ -230,7 +231,7 @@ TEST_P(Trans32x32Test, InverseAccuracy) {
     reference_32x32_dct_2d(in, out_r);
     for (int j = 0; j < kNumCoeffs; ++j)
       coeff[j] = round(out_r[j]);
-    REGISTER_STATE_CHECK(inv_txfm_(coeff, dst, 32));
+    ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, dst, 32));
     for (int j = 0; j < kNumCoeffs; ++j) {
       const int diff = dst[j] - src[j];
       const int error = diff * diff;
diff --git a/test/decode_test_driver.cc b/test/decode_test_driver.cc
index 8bea4ccf9b19875a548b0139278bb76addfeb359..187a1c7cfb6b33e7a9d9298dac828f5e819905eb 100644
--- a/test/decode_test_driver.cc
+++ b/test/decode_test_driver.cc
@@ -32,7 +32,7 @@ vpx_codec_err_t Decoder::DecodeFrame(const uint8_t *cxdata, size_t size,
                                      void *user_priv) {
   vpx_codec_err_t res_dec;
   InitOnce();
-  REGISTER_STATE_CHECK(
+  API_REGISTER_STATE_CHECK(
       res_dec = vpx_codec_decode(&decoder_,
                                  cxdata, static_cast<unsigned int>(size),
                                  user_priv, 0));
diff --git a/test/encode_test_driver.cc b/test/encode_test_driver.cc
index 709831efe1a1c7ad4d903d95c9fe6fecdc53d706..b30c3e098e7214b8478d3a51141bc708fef0b268 100644
--- a/test/encode_test_driver.cc
+++ b/test/encode_test_driver.cc
@@ -59,7 +59,7 @@ void Encoder::EncodeFrameInternal(const VideoSource &video,
   }
 
   // Encode the frame
-  REGISTER_STATE_CHECK(
+  API_REGISTER_STATE_CHECK(
       res = vpx_codec_encode(&encoder_,
                              video.img(), video.pts(), video.duration(),
                              frame_flags, deadline_));
diff --git a/test/fdct4x4_test.cc b/test/fdct4x4_test.cc
index ec233d3f3658adcbb01a5c05e2c80765c53fe9fa..ab636bf29fb46eedc4c872b319af77d0e1702db3 100644
--- a/test/fdct4x4_test.cc
+++ b/test/fdct4x4_test.cc
@@ -79,9 +79,9 @@ class Trans4x4TestBase {
         test_input_block[j] = src[j] - dst[j];
       }
 
-      REGISTER_STATE_CHECK(RunFwdTxfm(test_input_block,
-                                      test_temp_block, pitch_));
-      REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
+      ASM_REGISTER_STATE_CHECK(RunFwdTxfm(test_input_block,
+                                          test_temp_block, pitch_));
+      ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
 
       for (int j = 0; j < kNumCoeffs; ++j) {
         const uint32_t diff = dst[j] - src[j];
@@ -114,7 +114,7 @@ class Trans4x4TestBase {
         input_block[j] = rnd.Rand8() - rnd.Rand8();
 
       fwd_txfm_ref(input_block, output_ref_block, pitch_, tx_type_);
-      REGISTER_STATE_CHECK(RunFwdTxfm(input_block, output_block, pitch_));
+      ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_block, output_block, pitch_));
 
       // The minimum quant value is 4.
       for (int j = 0; j < kNumCoeffs; ++j)
@@ -145,8 +145,8 @@ class Trans4x4TestBase {
       }
 
       fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_);
-      REGISTER_STATE_CHECK(RunFwdTxfm(input_extreme_block,
-                                      output_block, pitch_));
+      ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_extreme_block,
+                                          output_block, pitch_));
 
       // The minimum quant value is 4.
       for (int j = 0; j < kNumCoeffs; ++j) {
@@ -175,7 +175,7 @@ class Trans4x4TestBase {
 
       fwd_txfm_ref(in, coeff, pitch_, tx_type_);
 
-      REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
+      ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
 
       for (int j = 0; j < kNumCoeffs; ++j) {
         const uint32_t diff = dst[j] - src[j];
diff --git a/test/fdct8x8_test.cc b/test/fdct8x8_test.cc
index 146aa31c688a8c9777d311d240b8233638a57b36..3e14c249395746f853fa32b91d753c6e4fb8a509 100644
--- a/test/fdct8x8_test.cc
+++ b/test/fdct8x8_test.cc
@@ -68,7 +68,7 @@ class FwdTrans8x8TestBase {
       // Initialize a test block with input range [-255, 255].
       for (int j = 0; j < 64; ++j)
         test_input_block[j] = rnd.Rand8() - rnd.Rand8();
-      REGISTER_STATE_CHECK(
+      ASM_REGISTER_STATE_CHECK(
           RunFwdTxfm(test_input_block, test_output_block, pitch_));
 
       for (int j = 0; j < 64; ++j) {
@@ -97,7 +97,7 @@ class FwdTrans8x8TestBase {
       // Initialize a test block with input range [-15, 15].
       for (int j = 0; j < 64; ++j)
         test_input_block[j] = (rnd.Rand8() >> 4) - (rnd.Rand8() >> 4);
-      REGISTER_STATE_CHECK(
+      ASM_REGISTER_STATE_CHECK(
           RunFwdTxfm(test_input_block, test_output_block, pitch_));
 
       for (int j = 0; j < 64; ++j) {
@@ -139,7 +139,7 @@ class FwdTrans8x8TestBase {
         test_input_block[j] = src[j] - dst[j];
       }
 
-      REGISTER_STATE_CHECK(
+      ASM_REGISTER_STATE_CHECK(
           RunFwdTxfm(test_input_block, test_temp_block, pitch_));
       for (int j = 0; j < 64; ++j) {
           if (test_temp_block[j] > 0) {
@@ -152,7 +152,7 @@ class FwdTrans8x8TestBase {
             test_temp_block[j] *= 4;
           }
       }
-      REGISTER_STATE_CHECK(
+      ASM_REGISTER_STATE_CHECK(
           RunInvTxfm(test_temp_block, dst, pitch_));
 
       for (int j = 0; j < 64; ++j) {
@@ -202,11 +202,11 @@ class FwdTrans8x8TestBase {
         test_input_block[j] = src[j] - dst[j];
       }
 
-      REGISTER_STATE_CHECK(
+      ASM_REGISTER_STATE_CHECK(
           RunFwdTxfm(test_input_block, test_temp_block, pitch_));
-      REGISTER_STATE_CHECK(
+      ASM_REGISTER_STATE_CHECK(
           fwd_txfm_ref(test_input_block, ref_temp_block, pitch_, tx_type_));
-      REGISTER_STATE_CHECK(
+      ASM_REGISTER_STATE_CHECK(
           RunInvTxfm(test_temp_block, dst, pitch_));
 
       for (int j = 0; j < 64; ++j) {
diff --git a/test/idct_test.cc b/test/idct_test.cc
index 1bbf80a0aec95608d55de2c683e483bd7497bfa2..c7f609d587d6e680609fc38e0058a4710fb46e05 100644
--- a/test/idct_test.cc
+++ b/test/idct_test.cc
@@ -52,7 +52,7 @@ TEST_P(IDCTTest, TestGuardBlocks) {
 TEST_P(IDCTTest, TestAllZeros) {
   int i;
 
-  REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
+  ASM_REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
 
   for (i = 0; i < 256; i++)
     if ((i & 0xF) < 4 && i < 64)
@@ -65,7 +65,7 @@ TEST_P(IDCTTest, TestAllOnes) {
   int i;
 
   input[0] = 4;
-  REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
+  ASM_REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
 
   for (i = 0; i < 256; i++)
     if ((i & 0xF) < 4 && i < 64)
@@ -79,7 +79,7 @@ TEST_P(IDCTTest, TestAddOne) {
 
   for (i = 0; i < 256; i++) predict[i] = i;
   input[0] = 4;
-  REGISTER_STATE_CHECK(UUT(input, predict, 16, output, 16));
+  ASM_REGISTER_STATE_CHECK(UUT(input, predict, 16, output, 16));
 
   for (i = 0; i < 256; i++)
     if ((i & 0xF) < 4 && i < 64)
@@ -93,7 +93,7 @@ TEST_P(IDCTTest, TestWithData) {
 
   for (i = 0; i < 16; i++) input[i] = i;
 
-  REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
+  ASM_REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
 
   for (i = 0; i < 256; i++)
     if ((i & 0xF) > 3 || i > 63)
diff --git a/test/intrapred_test.cc b/test/intrapred_test.cc
index cefe192fb4434fc5892a5f4528937ea00c8d92f7..d93251dfe39ccceb88b1650ba16825eb18b73883 100644
--- a/test/intrapred_test.cc
+++ b/test/intrapred_test.cc
@@ -261,10 +261,10 @@ class IntraPredYTest
 
   virtual void Predict(MB_PREDICTION_MODE mode) {
     mbptr_->mode_info_context->mbmi.mode = mode;
-    REGISTER_STATE_CHECK(pred_fn_(mbptr_,
-                                  data_ptr_[0] - kStride,
-                                  data_ptr_[0] - 1, kStride,
-                                  data_ptr_[0], kStride));
+    ASM_REGISTER_STATE_CHECK(pred_fn_(mbptr_,
+                                      data_ptr_[0] - kStride,
+                                      data_ptr_[0] - 1, kStride,
+                                      data_ptr_[0], kStride));
   }
 
   intra_pred_y_fn_t pred_fn_;
diff --git a/test/partial_idct_test.cc b/test/partial_idct_test.cc
index f2171b20010c842a57d1d3887bda3c04deac009a..fd24c75607a30b6a9f51963639e8f1baead1fd96 100644
--- a/test/partial_idct_test.cc
+++ b/test/partial_idct_test.cc
@@ -118,8 +118,8 @@ TEST_P(PartialIDctTest, RunQuantCheck) {
                          = (output_ref_block[j] / 1828) * 1828;
     }
 
-    REGISTER_STATE_CHECK(full_itxfm_(test_coef_block1, dst1, size));
-    REGISTER_STATE_CHECK(partial_itxfm_(test_coef_block1, dst2, size));
+    ASM_REGISTER_STATE_CHECK(full_itxfm_(test_coef_block1, dst1, size));
+    ASM_REGISTER_STATE_CHECK(partial_itxfm_(test_coef_block1, dst2, size));
 
     for (int j = 0; j < block_size; ++j) {
       const int diff = dst1[j] - dst2[j];
@@ -182,8 +182,8 @@ TEST_P(PartialIDctTest, ResultsMatch) {
     memcpy(test_coef_block2, test_coef_block1,
            sizeof(*test_coef_block2) * block_size);
 
-    REGISTER_STATE_CHECK(full_itxfm_(test_coef_block1, dst1, size));
-    REGISTER_STATE_CHECK(partial_itxfm_(test_coef_block2, dst2, size));
+    ASM_REGISTER_STATE_CHECK(full_itxfm_(test_coef_block1, dst1, size));
+    ASM_REGISTER_STATE_CHECK(partial_itxfm_(test_coef_block2, dst2, size));
 
     for (int j = 0; j < block_size; ++j) {
       const int diff = dst1[j] - dst2[j];
diff --git a/test/pp_filter_test.cc b/test/pp_filter_test.cc
index 86c2b0e8d038fe3470822e0b59e0ab286e697091..1144083ad4e8a740837b5f74c6f086494aad23bd 100644
--- a/test/pp_filter_test.cc
+++ b/test/pp_filter_test.cc
@@ -80,8 +80,9 @@ TEST_P(VP8PostProcessingFilterTest, FilterOutputCheck) {
   // Initialize pixels in the output to 99.
   (void)vpx_memset(dst_image, 99, output_size);
 
-  REGISTER_STATE_CHECK(GetParam()(src_image_ptr, dst_image_ptr, input_stride,
-                                  output_stride, block_width, flimits, 16));
+  ASM_REGISTER_STATE_CHECK(
+      GetParam()(src_image_ptr, dst_image_ptr, input_stride,
+                 output_stride, block_width, flimits, 16));
 
   static const uint8_t expected_data[block_height] = {
     4, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 4
diff --git a/test/register_state_check.h b/test/register_state_check.h
index 1ee149b0c749f618a2eed5f0c1f5d19207c6a1d5..8d4beea5f06006a02ad5df8bca4e3f9bf0e2fde6 100644
--- a/test/register_state_check.h
+++ b/test/register_state_check.h
@@ -13,6 +13,20 @@
 
 #include "third_party/googletest/src/include/gtest/gtest.h"
 #include "./vpx_config.h"
+#include "vpx/vpx_integer.h"
+
+// ASM_REGISTER_STATE_CHECK(asm_function)
+//   Minimally validates the environment pre & post function execution. This
+//   variant should be used with assembly functions which are not expected to
+//   fully restore the system state. See platform implementations of
+//   RegisterStateCheck for details.
+//
+// API_REGISTER_STATE_CHECK(api_function)
+//   Performs all the checks done by ASM_REGISTER_STATE_CHECK() and any
+//   additional checks to ensure the environment is in a consistent state pre &
+//   post function execution. This variant should be used with API functions.
+//   See platform implementations of RegisterStateCheckXXX for details.
+//
 
 #if defined(_WIN64)
 
@@ -35,11 +49,6 @@ namespace libvpx_test {
 // Compares the state of xmm[6-15] at construction with their state at
 // destruction. These registers should be preserved by the callee on
 // Windows x64.
-// Usage:
-// {
-//   RegisterStateCheck reg_check;
-//   FunctionToVerify();
-// }
 class RegisterStateCheck {
  public:
   RegisterStateCheck() { initialized_ = StoreRegisters(&pre_context_); }
@@ -75,9 +84,9 @@ class RegisterStateCheck {
   CONTEXT pre_context_;
 };
 
-#define REGISTER_STATE_CHECK(statement) do { \
-  libvpx_test::RegisterStateCheck reg_check; \
-  statement;                               \
+#define ASM_REGISTER_STATE_CHECK(statement) do {  \
+  libvpx_test::RegisterStateCheck reg_check;      \
+  statement;                                      \
 } while (false)
 
 }  // namespace libvpx_test
@@ -85,8 +94,6 @@ class RegisterStateCheck {
 #elif defined(CONFIG_SHARED) && defined(HAVE_NEON_ASM) && defined(CONFIG_VP9) \
       && !CONFIG_SHARED && HAVE_NEON_ASM && CONFIG_VP9
 
-#include "vpx/vpx_integer.h"
-
 extern "C" {
 // Save the d8-d15 registers into store.
 void vp9_push_neon(int64_t *store);
@@ -97,11 +104,6 @@ namespace libvpx_test {
 // Compares the state of d8-d15 at construction with their state at
 // destruction. These registers should be preserved by the callee on
 // arm platform.
-// Usage:
-// {
-//   RegisterStateCheck reg_check;
-//   FunctionToVerify();
-// }
 class RegisterStateCheck {
  public:
   RegisterStateCheck() { initialized_ = StoreRegisters(pre_store_); }
@@ -129,9 +131,9 @@ class RegisterStateCheck {
   int64_t pre_store_[8];
 };
 
-#define REGISTER_STATE_CHECK(statement) do { \
-  libvpx_test::RegisterStateCheck reg_check; \
-  statement;                               \
+#define ASM_REGISTER_STATE_CHECK(statement) do {  \
+  libvpx_test::RegisterStateCheck reg_check;      \
+  statement;                                      \
 } while (false)
 
 }  // namespace libvpx_test
@@ -141,10 +143,54 @@ class RegisterStateCheck {
 namespace libvpx_test {
 
 class RegisterStateCheck {};
-#define REGISTER_STATE_CHECK(statement) statement
+#define ASM_REGISTER_STATE_CHECK(statement) statement
 
 }  // namespace libvpx_test
 
 #endif  // _WIN64
 
+#if ARCH_X86 || ARCH_X86_64
+#if defined(__GNUC__)
+
+namespace libvpx_test {
+
+// Checks the FPU tag word pre/post execution to ensure emms has been called.
+class RegisterStateCheckMMX {
+ public:
+  RegisterStateCheckMMX() {
+    __asm__ volatile("fstenv %0" : "=rm"(pre_fpu_env_));
+  }
+  ~RegisterStateCheckMMX() { EXPECT_TRUE(Check()); }
+
+ private:
+  // Checks the FPU tag word pre/post execution, returning false if not cleared
+  // to 0xffff.
+  bool Check() const {
+    EXPECT_EQ(0xffff, pre_fpu_env_[4])
+        << "FPU was in an inconsistent state prior to call";
+
+    uint16_t post_fpu_env[14];
+    __asm__ volatile("fstenv %0" : "=rm"(post_fpu_env));
+    EXPECT_EQ(0xffff, post_fpu_env[4])
+        << "FPU was left in an inconsistent state after call";
+    return !testing::Test::HasNonfatalFailure();
+  }
+
+  uint16_t pre_fpu_env_[14];
+};
+
+#define API_REGISTER_STATE_CHECK(statement) do {  \
+  libvpx_test::RegisterStateCheckMMX reg_check;   \
+  ASM_REGISTER_STATE_CHECK(statement);            \
+} while (false)
+
+}  // namespace libvpx_test
+
+#endif  // __GNUC__
+#endif  // ARCH_X86 || ARCH_X86_64
+
+#ifndef API_REGISTER_STATE_CHECK
+#define API_REGISTER_STATE_CHECK ASM_REGISTER_STATE_CHECK
+#endif
+
 #endif  // TEST_REGISTER_STATE_CHECK_H_
diff --git a/test/sad_test.cc b/test/sad_test.cc
index 89d8c415206f05a8c85eba6c6ea8d5349741579e..7e5bcef33e1cc69b94f8310b0a038f4ddbac38ae 100644
--- a/test/sad_test.cc
+++ b/test/sad_test.cc
@@ -149,9 +149,9 @@ class SADx4Test
     const uint8_t* refs[] = {GetReference(0), GetReference(1),
                              GetReference(2), GetReference(3)};
 
-    REGISTER_STATE_CHECK(GET_PARAM(2)(source_data_, source_stride_,
-                                      refs, reference_stride_,
-                                      results));
+    ASM_REGISTER_STATE_CHECK(GET_PARAM(2)(source_data_, source_stride_,
+                                          refs, reference_stride_,
+                                          results));
   }
 
   void CheckSADs() {
@@ -178,9 +178,9 @@ class SADTest
     unsigned int ret;
     const uint8_t* const reference = GetReference(block_idx);
 
-    REGISTER_STATE_CHECK(ret = GET_PARAM(2)(source_data_, source_stride_,
-                                            reference, reference_stride_,
-                                            max_sad));
+    ASM_REGISTER_STATE_CHECK(ret = GET_PARAM(2)(source_data_, source_stride_,
+                                                reference, reference_stride_,
+                                                max_sad));
     return ret;
   }
 
@@ -210,8 +210,8 @@ class SADVP9Test
     unsigned int ret;
     const uint8_t* const reference = GetReference(block_idx);
 
-    REGISTER_STATE_CHECK(ret = GET_PARAM(2)(source_data_, source_stride_,
-                                            reference, reference_stride_));
+    ASM_REGISTER_STATE_CHECK(ret = GET_PARAM(2)(source_data_, source_stride_,
+                                                reference, reference_stride_));
     return ret;
   }
 
diff --git a/test/sixtap_predict_test.cc b/test/sixtap_predict_test.cc
index 0c600f402e93a4645069c8f0d977b2d7b453f793..ac7aa99472a65b5dff94da6dd95f93c054470e35 100644
--- a/test/sixtap_predict_test.cc
+++ b/test/sixtap_predict_test.cc
@@ -143,8 +143,9 @@ TEST_P(SixtapPredictTest, TestWithPresetData) {
 
   uint8_t *src = const_cast<uint8_t*>(test_data);
 
-  REGISTER_STATE_CHECK(sixtap_predict_(&src[kSrcStride * 2 + 2 + 1], kSrcStride,
-                                       2, 2, dst_, kDstStride));
+  ASM_REGISTER_STATE_CHECK(
+      sixtap_predict_(&src[kSrcStride * 2 + 2 + 1], kSrcStride,
+                      2, 2, dst_, kDstStride));
 
   for (int i = 0; i < height_; ++i)
     for (int j = 0; j < width_; ++j)
@@ -169,7 +170,7 @@ TEST_P(SixtapPredictTest, TestWithRandomData) {
                                 xoffset, yoffset, dst_c_, kDstStride);
 
       // Run test.
-      REGISTER_STATE_CHECK(
+      ASM_REGISTER_STATE_CHECK(
           sixtap_predict_(&src_[kSrcStride * 2 + 2 + 1], kSrcStride,
                           xoffset, yoffset, dst_, kDstStride));
 
diff --git a/test/subtract_test.cc b/test/subtract_test.cc
index 63e999d6e7b73f1ab5c4501bd2670585705e1e62..2db3dd785dbe0cc7c318c288fcfd79d0b7e72a41 100644
--- a/test/subtract_test.cc
+++ b/test/subtract_test.cc
@@ -82,7 +82,7 @@ TEST_P(SubtractBlockTest, SimpleSubtract) {
       predictor += kDiffPredStride;
     }
 
-    REGISTER_STATE_CHECK(GetParam()(&be, &bd, kDiffPredStride));
+    ASM_REGISTER_STATE_CHECK(GetParam()(&be, &bd, kDiffPredStride));
 
     base_src = *be.base_src;
     src_diff = be.src_diff;
diff --git a/test/variance_test.cc b/test/variance_test.cc
index 9985695163adf2a09f6eba763673f3bc394728e1..5469770690a4952057be60a9ada642487fdcbe60 100644
--- a/test/variance_test.cc
+++ b/test/variance_test.cc
@@ -124,7 +124,8 @@ void VarianceTest<VarianceFunctionType>::ZeroTest() {
       memset(ref_, j, block_size_);
       unsigned int sse;
       unsigned int var;
-      REGISTER_STATE_CHECK(var = variance_(src_, width_, ref_, width_, &sse));
+      ASM_REGISTER_STATE_CHECK(
+          var = variance_(src_, width_, ref_, width_, &sse));
       EXPECT_EQ(0u, var) << "src values: " << i << "ref values: " << j;
     }
   }
@@ -139,7 +140,8 @@ void VarianceTest<VarianceFunctionType>::RefTest() {
     }
     unsigned int sse1, sse2;
     unsigned int var1;
-    REGISTER_STATE_CHECK(var1 = variance_(src_, width_, ref_, width_, &sse1));
+    ASM_REGISTER_STATE_CHECK(
+        var1 = variance_(src_, width_, ref_, width_, &sse1));
     const unsigned int var2 = variance_ref(src_, ref_, log2width_,
                                            log2height_, &sse2);
     EXPECT_EQ(sse1, sse2);
@@ -155,7 +157,7 @@ void VarianceTest<VarianceFunctionType>::OneQuarterTest() {
   memset(ref_ + half, 0, half);
   unsigned int sse;
   unsigned int var;
-  REGISTER_STATE_CHECK(var = variance_(src_, width_, ref_, width_, &sse));
+  ASM_REGISTER_STATE_CHECK(var = variance_(src_, width_, ref_, width_, &sse));
   const unsigned int expected = block_size_ * 255 * 255 / 4;
   EXPECT_EQ(expected, var);
 }
@@ -246,8 +248,8 @@ void SubpelVarianceTest<SubpelVarianceFunctionType>::RefTest() {
       }
       unsigned int sse1, sse2;
       unsigned int var1;
-      REGISTER_STATE_CHECK(var1 = subpel_variance_(ref_, width_ + 1, x, y,
-                                                   src_, width_, &sse1));
+      ASM_REGISTER_STATE_CHECK(var1 = subpel_variance_(ref_, width_ + 1, x, y,
+                                                       src_, width_, &sse1));
       const unsigned int var2 = subpel_variance_ref(ref_, src_, log2width_,
                                                     log2height_, x, y, &sse2);
       EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y;
@@ -269,8 +271,9 @@ void SubpelVarianceTest<vp9_subp_avg_variance_fn_t>::RefTest() {
       }
       unsigned int sse1, sse2;
       unsigned int var1;
-      REGISTER_STATE_CHECK(var1 = subpel_variance_(ref_, width_ + 1, x, y,
-                                                   src_, width_, &sse1, sec_));
+      ASM_REGISTER_STATE_CHECK(
+          var1 = subpel_variance_(ref_, width_ + 1, x, y,
+                                  src_, width_, &sse1, sec_));
       const unsigned int var2 = subpel_avg_variance_ref(ref_, src_, sec_,
                                                         log2width_, log2height_,
                                                         x, y, &sse2);
diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c
index 29fea616bc89de325fa246ce3f788632fee3cdd1..1d763b6bfae2c9149b7fe0af2621fe6c0043b380 100644
--- a/vp8/decoder/onyxd_if.c
+++ b/vp8/decoder/onyxd_if.c
@@ -386,6 +386,7 @@ int vp8dx_receive_compressed_data(VP8D_COMP *pbi, size_t size,
 
 decode_exit:
     pbi->common.error.setjmp = 0;
+    vp8_clear_system_state();
     return retcode;
 }
 int vp8dx_get_raw_frame(VP8D_COMP *pbi, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp8_ppflags_t *flags)
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 09854a544fa4266a3d426cdc70e4c844dc0fc9df..373dbebd962c9c90e285a9b50ca89afb8d27141b 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -4886,6 +4886,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
     if (setjmp(cpi->common.error.jmp))
     {
         cpi->common.error.setjmp = 0;
+        vp8_clear_system_state();
         return VPX_CODEC_CORRUPT_FRAME;
     }
 
diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c
index 56394fb1ca81f57c7a01049719bcaff9ad1b96d2..b695ddc8d4fc9ccf1f45a3cfa55f3a38223e32c5 100644
--- a/vp8/vp8_dx_iface.c
+++ b/vp8/vp8_dx_iface.c
@@ -409,6 +409,7 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t  *ctx,
                 if (setjmp(pbi->common.error.jmp))
                 {
                     pbi->common.error.setjmp = 0;
+                    vp8_clear_system_state();
                     /* same return value as used in vp8dx_receive_compressed_data */
                     return -1;
                 }
diff --git a/vp9/decoder/vp9_decoder.c b/vp9/decoder/vp9_decoder.c
index a1a78a9471101ee058bfa54b0fee2c288e459d63..e32637b06fc1635c5e8858b7b871497b96727bf4 100644
--- a/vp9/decoder/vp9_decoder.c
+++ b/vp9/decoder/vp9_decoder.c
@@ -246,6 +246,7 @@ int vp9_receive_compressed_data(VP9Decoder *pbi,
 
   if (setjmp(cm->error.jmp)) {
     cm->error.setjmp = 0;
+    vp9_clear_system_state();
 
     // We do not know if the missing frame(s) was supposed to update
     // any of the reference buffers, but we act conservative and