diff --git a/vp8/common/arm/armv6/vp8_variance16x16_armv6.asm b/vp8/common/arm/armv6/vp8_variance16x16_armv6.asm
index dc84c30daf5357d853cbea53e9abd77c9e26bae7..39919579f80521ca383cbabff80a639e8cc59a37 100644
--- a/vp8/common/arm/armv6/vp8_variance16x16_armv6.asm
+++ b/vp8/common/arm/armv6/vp8_variance16x16_armv6.asm
@@ -53,7 +53,7 @@ loop
     orr     r6, r6, r7          ; differences of all 4 pixels
     ; calculate total sum
     adds    r8, r8, r4          ; add positive differences to sum
-    subs    r8, r8, r5          ; substract negative differences from sum
+    subs    r8, r8, r5          ; subtract negative differences from sum
 
     ; calculate sse
     uxtb16  r5, r6              ; byte (two pixels) to halfwords
@@ -77,7 +77,7 @@ loop
 
     ; calculate total sum
     add     r8, r8, r4          ; add positive differences to sum
-    sub     r8, r8, r5          ; substract negative differences from sum
+    sub     r8, r8, r5          ; subtract negative differences from sum
 
     ; calculate sse
     uxtb16  r5, r6              ; byte (two pixels) to halfwords
@@ -101,7 +101,7 @@ loop
 
     ; calculate total sum
     add     r8, r8, r4          ; add positive differences to sum
-    sub     r8, r8, r5          ; substract negative differences from sum
+    sub     r8, r8, r5          ; subtract negative differences from sum
 
     ; calculate sse
     uxtb16  r5, r6              ; byte (two pixels) to halfwords
@@ -127,7 +127,7 @@ loop
 
     ; calculate total sum
     add     r8, r8, r4          ; add positive differences to sum
-    sub     r8, r8, r5          ; substract negative differences from sum
+    sub     r8, r8, r5          ; subtract negative differences from sum
 
     ; calculate sse
     uxtb16  r5, r6              ; byte (two pixels) to halfwords
diff --git a/vp8/common/arm/armv6/vp8_variance8x8_armv6.asm b/vp8/common/arm/armv6/vp8_variance8x8_armv6.asm
index adc353d2006e21d854529ff1e7874ae13ba2408c..915ee499309146a05efd912542ff5d788dff53a3 100644
--- a/vp8/common/arm/armv6/vp8_variance8x8_armv6.asm
+++ b/vp8/common/arm/armv6/vp8_variance8x8_armv6.asm
@@ -51,7 +51,7 @@ loop
     orr     r8, r8, r10         ; differences of all 4 pixels
     ; calculate total sum
     add    r4, r4, r6           ; add positive differences to sum
-    sub    r4, r4, r7           ; substract negative differences from sum
+    sub    r4, r4, r7           ; subtract negative differences from sum
 
     ; calculate sse
     uxtb16  r7, r8              ; byte (two pixels) to halfwords
@@ -77,7 +77,7 @@ loop
 
     ; calculate total sum
     add     r4, r4, r6          ; add positive differences to sum
-    sub     r4, r4, r7          ; substract negative differences from sum
+    sub     r4, r4, r7          ; subtract negative differences from sum
 
     ; calculate sse
     uxtb16  r7, r8              ; byte (two pixels) to halfwords
diff --git a/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm b/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm
index dd2ce685c8bc7e71198a1ade8fcf06c15d4ab749..3668dc517a90aec07d4d709a924b4aabd81058d9 100644
--- a/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm
+++ b/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm
@@ -58,7 +58,7 @@ loop
     orr     r6, r6, r7          ; differences of all 4 pixels
     ; calculate total sum
     adds    r8, r8, r4          ; add positive differences to sum
-    subs    r8, r8, r5          ; substract negative differences from sum
+    subs    r8, r8, r5          ; subtract negative differences from sum
 
     ; calculate sse
     uxtb16  r5, r6              ; byte (two pixels) to halfwords
@@ -89,7 +89,7 @@ loop
 
     ; calculate total sum
     add     r8, r8, r4          ; add positive differences to sum
-    sub     r8, r8, r5          ; substract negative differences from sum
+    sub     r8, r8, r5          ; subtract negative differences from sum
 
     ; calculate sse
     uxtb16  r5, r6              ; byte (two pixels) to halfwords
@@ -120,7 +120,7 @@ loop
 
     ; calculate total sum
     add     r8, r8, r4          ; add positive differences to sum
-    sub     r8, r8, r5          ; substract negative differences from sum
+    sub     r8, r8, r5          ; subtract negative differences from sum
 
     ; calculate sse
     uxtb16  r5, r6              ; byte (two pixels) to halfwords
@@ -153,7 +153,7 @@ loop
 
     ; calculate total sum
     add     r8, r8, r4          ; add positive differences to sum
-    sub     r8, r8, r5          ; substract negative differences from sum
+    sub     r8, r8, r5          ; subtract negative differences from sum
 
     ; calculate sse
     uxtb16  r5, r6              ; byte (two pixels) to halfwords
diff --git a/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm b/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm
index f972d9b5bac4b789e0160e15350539421a0fe7e0..b4e0959d1b387ad06158233efd4b3fc0018121b8 100644
--- a/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm
+++ b/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm
@@ -69,7 +69,7 @@ loop
     orr     r6, r6, r7          ; differences of all 4 pixels
     ; calculate total sum
     adds    r8, r8, r4          ; add positive differences to sum
-    subs    r8, r8, r5          ; substract negative differences from sum
+    subs    r8, r8, r5          ; subtract negative differences from sum
 
     ; calculate sse
     uxtb16  r5, r6              ; byte (two pixels) to halfwords
@@ -111,7 +111,7 @@ loop
 
     ; calculate total sum
     add     r8, r8, r4          ; add positive differences to sum
-    sub     r8, r8, r5          ; substract negative differences from sum
+    sub     r8, r8, r5          ; subtract negative differences from sum
 
     ; calculate sse
     uxtb16  r5, r6              ; byte (two pixels) to halfwords
@@ -153,7 +153,7 @@ loop
 
     ; calculate total sum
     add     r8, r8, r4          ; add positive differences to sum
-    sub     r8, r8, r5          ; substract negative differences from sum
+    sub     r8, r8, r5          ; subtract negative differences from sum
 
     ; calculate sse
     uxtb16  r5, r6              ; byte (two pixels) to halfwords
@@ -195,7 +195,7 @@ loop
 
     ; calculate total sum
     add     r8, r8, r4          ; add positive differences to sum
-    sub     r8, r8, r5          ; substract negative differences from sum
+    sub     r8, r8, r5          ; subtract negative differences from sum
 
     ; calculate sse
     uxtb16  r5, r6              ; byte (two pixels) to halfwords
diff --git a/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm b/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm
index f5da9c09eedd4cdbd516782a5c25eb919409c50e..10863e2ec801de65475ad756a794ca9c41c26d36 100644
--- a/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm
+++ b/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm
@@ -59,7 +59,7 @@ loop
     orr     r6, r6, r7          ; differences of all 4 pixels
     ; calculate total sum
     adds    r8, r8, r4          ; add positive differences to sum
-    subs    r8, r8, r5          ; substract negative differences from sum
+    subs    r8, r8, r5          ; subtract negative differences from sum
 
     ; calculate sse
     uxtb16  r5, r6              ; byte (two pixels) to halfwords
@@ -90,7 +90,7 @@ loop
 
     ; calculate total sum
     add     r8, r8, r4          ; add positive differences to sum
-    sub     r8, r8, r5          ; substract negative differences from sum
+    sub     r8, r8, r5          ; subtract negative differences from sum
 
     ; calculate sse
     uxtb16  r5, r6              ; byte (two pixels) to halfwords
@@ -121,7 +121,7 @@ loop
 
     ; calculate total sum
     add     r8, r8, r4          ; add positive differences to sum
-    sub     r8, r8, r5          ; substract negative differences from sum
+    sub     r8, r8, r5          ; subtract negative differences from sum
 
     ; calculate sse
     uxtb16  r5, r6              ; byte (two pixels) to halfwords
@@ -154,7 +154,7 @@ loop
 
     ; calculate total sum
     add     r8, r8, r4          ; add positive differences to sum
-    sub     r8, r8, r5          ; substract negative differences from sum
+    sub     r8, r8, r5          ; subtract negative differences from sum
 
     ; calculate sse
     uxtb16  r5, r6              ; byte (two pixels) to halfwords
diff --git a/vp8/common/x86/loopfilter_mmx.asm b/vp8/common/x86/loopfilter_mmx.asm
index f388d2476818e4a63fc93d4b89b0b4d8b0b3f652..88a07b9f3fa74d7a5b82a1e5cd625730bd1b218c 100644
--- a/vp8/common/x86/loopfilter_mmx.asm
+++ b/vp8/common/x86/loopfilter_mmx.asm
@@ -527,7 +527,7 @@ sym(vp8_loop_filter_vertical_edge_mmx):
         pxor        mm7,        [GLOBAL(t80)]       ; unoffset
         ; mm7 = q1
 
-        ; tranpose and write back
+        ; transpose and write back
         ; mm1 =    72 62 52 42 32 22 12 02
         ; mm6 =    73 63 53 43 33 23 13 03
         ; mm3 =    74 64 54 44 34 24 14 04
@@ -1289,7 +1289,7 @@ sym(vp8_mbloop_filter_vertical_edge_mmx):
         pxor        mm6, [GLOBAL(t80)]          ; mm6 = 71 61 51 41 31 21 11 01
         pxor        mm3, [GLOBAL(t80)]          ; mm3 = 76 66 56 46 36 26 15 06
 
-        ; tranpose and write back
+        ; transpose and write back
         movq        mm0,    [rdx]               ; mm0 = 70 60 50 40 30 20 10 00
         movq        mm1,    mm0                 ; mm0 = 70 60 50 40 30 20 10 00
 
diff --git a/vp8/common/x86/loopfilter_sse2.asm b/vp8/common/x86/loopfilter_sse2.asm
index a66753ba806ebecac70c1d675e1e67ca8d64e808..1913abc69b072d23c4e7b610f6f0b9a4dd9e9e0f 100644
--- a/vp8/common/x86/loopfilter_sse2.asm
+++ b/vp8/common/x86/loopfilter_sse2.asm
@@ -958,7 +958,7 @@ sym(vp8_loop_filter_vertical_edge_sse2):
         ; start work on filters
         B_FILTER 2
 
-        ; tranpose and write back - only work on q1, q0, p0, p1
+        ; transpose and write back - only work on q1, q0, p0, p1
         BV_TRANSPOSE
         ; store 16-line result
 
@@ -1023,7 +1023,7 @@ sym(vp8_loop_filter_vertical_edge_uv_sse2):
         ; start work on filters
         B_FILTER 2
 
-        ; tranpose and write back - only work on q1, q0, p0, p1
+        ; transpose and write back - only work on q1, q0, p0, p1
         BV_TRANSPOSE
 
         lea         rdi,        [rsi + rax]             ; rdi points to row +1 for indirect addressing
diff --git a/vp9/common/arm/neon/vp9_idct32x32_add_neon.asm b/vp9/common/arm/neon/vp9_idct32x32_add_neon.asm
index 388a7d719838ce2f1079c7660822e4e269f6ca92..72e933eee9672f8722d98a138ef642f5641ef0ac 100644
--- a/vp9/common/arm/neon/vp9_idct32x32_add_neon.asm
+++ b/vp9/common/arm/neon/vp9_idct32x32_add_neon.asm
@@ -72,7 +72,7 @@ cospi_31_64 EQU   804
     ;   reg1 = output[first_offset]
     ;   reg2 = output[second_offset]
     ;   for proper address calculation, the last offset used when manipulating
-    ;   output, wethere reading or storing) must be passed in. use 0 for first
+    ;   output, whether reading or storing) must be passed in. use 0 for first
     ;   use.
     MACRO
     LOAD_FROM_OUTPUT $prev_offset, $first_offset, $second_offset, $reg1, $reg2
@@ -88,7 +88,7 @@ cospi_31_64 EQU   804
     ;   output[first_offset] = reg1
     ;   output[second_offset] = reg2
     ;   for proper address calculation, the last offset used when manipulating
-    ;   output, wethere reading or storing) must be passed in. use 0 for first
+    ;   output, whether reading or storing) must be passed in. use 0 for first
     ;   use.
     MACRO
     STORE_IN_OUTPUT $prev_offset, $first_offset, $second_offset, $reg1, $reg2
@@ -242,7 +242,7 @@ cospi_31_64 EQU   804
     ; TODO(cd): have special case to re-use constants when they are similar for
     ;           consecutive butterflies
     ; TODO(cd): have special case when both constants are the same, do the
-    ;           additions/substractions before the multiplies.
+    ;           additions/subtractions before the multiplies.
     ; generate the constants
     ;   generate scalar constants
     mov             r8,  #$first_constant  & 0xFF00
@@ -260,7 +260,7 @@ cospi_31_64 EQU   804
     vmull.s16 q11, $regB, d31
     vmull.s16 q12, $regC, d31
     ; (used) five for intermediate (q8-q12), one for constants (q15)
-    ; do some addition/substractions (to get back two register)
+    ; do some addition/subtractions (to get back two register)
     vsub.s32  q8, q8, q10
     vsub.s32  q9, q9, q11
     ; do more multiplications (ordered for maximum latency hiding)
@@ -268,7 +268,7 @@ cospi_31_64 EQU   804
     vmull.s16 q11, $regA, d30
     vmull.s16 q15, $regB, d30
     ; (used) six for intermediate (q8-q12, q15)
-    ; do more addition/substractions
+    ; do more addition/subtractions
     vadd.s32  q11, q12, q11
     vadd.s32  q10, q10, q15
     ; (used) four for intermediate (q8-q11)
diff --git a/vp9/common/x86/vp9_loopfilter_mmx.asm b/vp9/common/x86/vp9_loopfilter_mmx.asm
index a7f69307da257450c44e215f4f8aca8af3861b4c..91055b9f9d49287f9e531439edcda68671d54f90 100644
--- a/vp9/common/x86/vp9_loopfilter_mmx.asm
+++ b/vp9/common/x86/vp9_loopfilter_mmx.asm
@@ -527,7 +527,7 @@ sym(vp9_lpf_vertical_4_mmx):
         pxor        mm7,        [GLOBAL(t80)]       ; unoffset
         ; mm7 = q1
 
-        ; tranpose and write back
+        ; transpose and write back
         ; mm1 =    72 62 52 42 32 22 12 02
         ; mm6 =    73 63 53 43 33 23 13 03
         ; mm3 =    74 64 54 44 34 24 14 04
diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c
index a9d168cc8f968801e54313b8c7248ab0767a2805..d5232393f3c24816b8be7aecc77d5f787b0b9928 100644
--- a/vp9/encoder/vp9_dct.c
+++ b/vp9/encoder/vp9_dct.c
@@ -47,7 +47,7 @@ void vp9_fdct4x4_c(const int16_t *input, int16_t *output, int stride) {
   // The 2D transform is done with two passes which are actually pretty
   // similar. In the first one, we transform the columns and transpose
   // the results. In the second one, we transform the rows. To achieve that,
-  // as the first pass results are transposed, we tranpose the columns (that
+  // as the first pass results are transposed, we transpose the columns (that
   // is the transposed rows) and transpose the results (so that it goes back
   // in normal/row positions).
   int pass;
@@ -315,7 +315,7 @@ void vp9_fdct16x16_c(const int16_t *input, int16_t *output, int stride) {
   // The 2D transform is done with two passes which are actually pretty
   // similar. In the first one, we transform the columns and transpose
   // the results. In the second one, we transform the rows. To achieve that,
-  // as the first pass results are transposed, we tranpose the columns (that
+  // as the first pass results are transposed, we transpose the columns (that
   // is the transposed rows) and transpose the results (so that it goes back
   // in normal/row positions).
   int pass;
diff --git a/vp9/encoder/x86/vp9_dct_avx2.c b/vp9/encoder/x86/vp9_dct_avx2.c
index 2b82d97505a3aefbdeb789175738718a3165d09a..b5269ed0303150b0e9ba2298ad1b7365f6492d93 100644
--- a/vp9/encoder/x86/vp9_dct_avx2.c
+++ b/vp9/encoder/x86/vp9_dct_avx2.c
@@ -16,7 +16,7 @@ void vp9_fdct4x4_avx2(const int16_t *input, int16_t *output, int stride) {
   // The 2D transform is done with two passes which are actually pretty
   // similar. In the first one, we transform the columns and transpose
   // the results. In the second one, we transform the rows. To achieve that,
-  // as the first pass results are transposed, we tranpose the columns (that
+  // as the first pass results are transposed, we transpose the columns (that
   // is the transposed rows) and transpose the results (so that it goes back
   // in normal/row positions).
   int pass;
@@ -46,7 +46,7 @@ void vp9_fdct4x4_avx2(const int16_t *input, int16_t *output, int stride) {
     in3 = _mm_slli_epi16(in3, 4);
     // if (i == 0 && input[0]) input[0] += 1;
     {
-      // The mask will only contain wether the first value is zero, all
+      // The mask will only contain whether the first value is zero, all
       // other comparison will fail as something shifted by 4 (above << 4)
       // can never be equal to one. To increment in the non-zero case, we
       // add the mask and one for the first element:
@@ -59,7 +59,7 @@ void vp9_fdct4x4_avx2(const int16_t *input, int16_t *output, int stride) {
   }
   // Do the two transform/transpose passes
   for (pass = 0; pass < 2; ++pass) {
-    // Transform 1/2: Add/substract
+    // Transform 1/2: Add/subtract
     const __m128i r0 = _mm_add_epi16(in0, in3);
     const __m128i r1 = _mm_add_epi16(in1, in2);
     const __m128i r2 = _mm_sub_epi16(in1, in2);
@@ -317,7 +317,7 @@ void vp9_fdct8x8_avx2(const int16_t *input, int16_t *output, int stride) {
   for (pass = 0; pass < 2; pass++) {
     // To store results of each pass before the transpose.
     __m128i res0, res1, res2, res3, res4, res5, res6, res7;
-    // Add/substract
+    // Add/subtract
     const __m128i q0 = _mm_add_epi16(in0, in7);
     const __m128i q1 = _mm_add_epi16(in1, in6);
     const __m128i q2 = _mm_add_epi16(in2, in5);
@@ -328,7 +328,7 @@ void vp9_fdct8x8_avx2(const int16_t *input, int16_t *output, int stride) {
     const __m128i q7 = _mm_sub_epi16(in0, in7);
     // Work on first four results
     {
-      // Add/substract
+      // Add/subtract
       const __m128i r0 = _mm_add_epi16(q0, q3);
       const __m128i r1 = _mm_add_epi16(q1, q2);
       const __m128i r2 = _mm_sub_epi16(q1, q2);
@@ -390,7 +390,7 @@ void vp9_fdct8x8_avx2(const int16_t *input, int16_t *output, int stride) {
       // Combine
       const __m128i r0 = _mm_packs_epi32(s0, s1);
       const __m128i r1 = _mm_packs_epi32(s2, s3);
-      // Add/substract
+      // Add/subtract
       const __m128i x0 = _mm_add_epi16(q4, r0);
       const __m128i x1 = _mm_sub_epi16(q4, r0);
       const __m128i x2 = _mm_sub_epi16(q7, r1);
@@ -1071,7 +1071,7 @@ void vp9_fdct16x16_avx2(const int16_t *input, int16_t *output, int stride) {
   // The 2D transform is done with two passes which are actually pretty
   // similar. In the first one, we transform the columns and transpose
   // the results. In the second one, we transform the rows. To achieve that,
-  // as the first pass results are transposed, we tranpose the columns (that
+  // as the first pass results are transposed, we transpose the columns (that
   // is the transposed rows) and transpose the results (so that it goes back
   // in normal/row positions).
   int pass;
@@ -1228,7 +1228,7 @@ void vp9_fdct16x16_avx2(const int16_t *input, int16_t *output, int stride) {
       }
       // Work on the first eight values; fdct8(input, even_results);
       {
-        // Add/substract
+        // Add/subtract
         const __m128i q0 = _mm_add_epi16(input0, input7);
         const __m128i q1 = _mm_add_epi16(input1, input6);
         const __m128i q2 = _mm_add_epi16(input2, input5);
@@ -1239,7 +1239,7 @@ void vp9_fdct16x16_avx2(const int16_t *input, int16_t *output, int stride) {
         const __m128i q7 = _mm_sub_epi16(input0, input7);
         // Work on first four results
         {
-          // Add/substract
+          // Add/subtract
           const __m128i r0 = _mm_add_epi16(q0, q3);
           const __m128i r1 = _mm_add_epi16(q1, q2);
           const __m128i r2 = _mm_sub_epi16(q1, q2);
@@ -1303,7 +1303,7 @@ void vp9_fdct16x16_avx2(const int16_t *input, int16_t *output, int stride) {
           // Combine
           const __m128i r0 = _mm_packs_epi32(s0, s1);
           const __m128i r1 = _mm_packs_epi32(s2, s3);
-          // Add/substract
+          // Add/subtract
           const __m128i x0 = _mm_add_epi16(q4, r0);
           const __m128i x1 = _mm_sub_epi16(q4, r0);
           const __m128i x2 = _mm_sub_epi16(q7, r1);
diff --git a/vp9/encoder/x86/vp9_dct_sse2.c b/vp9/encoder/x86/vp9_dct_sse2.c
index 852cf866763a400246567fb722665a8b693ad832..f3735ebd30420e2922fab739b165699f36430985 100644
--- a/vp9/encoder/x86/vp9_dct_sse2.c
+++ b/vp9/encoder/x86/vp9_dct_sse2.c
@@ -16,7 +16,7 @@ void vp9_fdct4x4_sse2(const int16_t *input, int16_t *output, int stride) {
   // The 2D transform is done with two passes which are actually pretty
   // similar. In the first one, we transform the columns and transpose
   // the results. In the second one, we transform the rows. To achieve that,
-  // as the first pass results are transposed, we tranpose the columns (that
+  // as the first pass results are transposed, we transpose the columns (that
   // is the transposed rows) and transpose the results (so that it goes back
   // in normal/row positions).
   int pass;
@@ -47,7 +47,7 @@ void vp9_fdct4x4_sse2(const int16_t *input, int16_t *output, int stride) {
     in1 = _mm_slli_epi16(in1, 4);
     // if (i == 0 && input[0]) input[0] += 1;
     {
-      // The mask will only contain wether the first value is zero, all
+      // The mask will only contain whether the first value is zero, all
       // other comparison will fail as something shifted by 4 (above << 4)
       // can never be equal to one. To increment in the non-zero case, we
       // add the mask and one for the first element:
@@ -60,7 +60,7 @@ void vp9_fdct4x4_sse2(const int16_t *input, int16_t *output, int stride) {
   }
   // Do the two transform/transpose passes
   for (pass = 0; pass < 2; ++pass) {
-    // Transform 1/2: Add/substract
+    // Transform 1/2: Add/subtract
     const __m128i r0 = _mm_add_epi16(in0, in1);
     const __m128i r1 = _mm_sub_epi16(in0, in1);
     const __m128i r2 = _mm_unpacklo_epi64(r0, r1);
@@ -315,7 +315,7 @@ void vp9_fdct8x8_sse2(const int16_t *input, int16_t *output, int stride) {
   for (pass = 0; pass < 2; pass++) {
     // To store results of each pass before the transpose.
     __m128i res0, res1, res2, res3, res4, res5, res6, res7;
-    // Add/substract
+    // Add/subtract
     const __m128i q0 = _mm_add_epi16(in0, in7);
     const __m128i q1 = _mm_add_epi16(in1, in6);
     const __m128i q2 = _mm_add_epi16(in2, in5);
@@ -326,7 +326,7 @@ void vp9_fdct8x8_sse2(const int16_t *input, int16_t *output, int stride) {
     const __m128i q7 = _mm_sub_epi16(in0, in7);
     // Work on first four results
     {
-      // Add/substract
+      // Add/subtract
       const __m128i r0 = _mm_add_epi16(q0, q3);
       const __m128i r1 = _mm_add_epi16(q1, q2);
       const __m128i r2 = _mm_sub_epi16(q1, q2);
@@ -388,7 +388,7 @@ void vp9_fdct8x8_sse2(const int16_t *input, int16_t *output, int stride) {
       // Combine
       const __m128i r0 = _mm_packs_epi32(s0, s1);
       const __m128i r1 = _mm_packs_epi32(s2, s3);
-      // Add/substract
+      // Add/subtract
       const __m128i x0 = _mm_add_epi16(q4, r0);
       const __m128i x1 = _mm_sub_epi16(q4, r0);
       const __m128i x2 = _mm_sub_epi16(q7, r1);
@@ -1069,7 +1069,7 @@ void vp9_fdct16x16_sse2(const int16_t *input, int16_t *output, int stride) {
   // The 2D transform is done with two passes which are actually pretty
   // similar. In the first one, we transform the columns and transpose
   // the results. In the second one, we transform the rows. To achieve that,
-  // as the first pass results are transposed, we tranpose the columns (that
+  // as the first pass results are transposed, we transpose the columns (that
   // is the transposed rows) and transpose the results (so that it goes back
   // in normal/row positions).
   int pass;
@@ -1226,7 +1226,7 @@ void vp9_fdct16x16_sse2(const int16_t *input, int16_t *output, int stride) {
       }
       // Work on the first eight values; fdct8(input, even_results);
       {
-        // Add/substract
+        // Add/subtract
         const __m128i q0 = _mm_add_epi16(input0, input7);
         const __m128i q1 = _mm_add_epi16(input1, input6);
         const __m128i q2 = _mm_add_epi16(input2, input5);
@@ -1237,7 +1237,7 @@ void vp9_fdct16x16_sse2(const int16_t *input, int16_t *output, int stride) {
         const __m128i q7 = _mm_sub_epi16(input0, input7);
         // Work on first four results
         {
-          // Add/substract
+          // Add/subtract
           const __m128i r0 = _mm_add_epi16(q0, q3);
           const __m128i r1 = _mm_add_epi16(q1, q2);
           const __m128i r2 = _mm_sub_epi16(q1, q2);
@@ -1301,7 +1301,7 @@ void vp9_fdct16x16_sse2(const int16_t *input, int16_t *output, int stride) {
           // Combine
           const __m128i r0 = _mm_packs_epi32(s0, s1);
           const __m128i r1 = _mm_packs_epi32(s2, s3);
-          // Add/substract
+          // Add/subtract
           const __m128i x0 = _mm_add_epi16(q4, r0);
           const __m128i x1 = _mm_sub_epi16(q4, r0);
           const __m128i x2 = _mm_sub_epi16(q7, r1);