1. 19 Dec, 2018 1 commit
  2. 18 Dec, 2018 1 commit
  3. 17 Dec, 2018 1 commit
  4. 13 Aug, 2018 1 commit
  5. 22 May, 2018 7 commits
  6. 01 Mar, 2018 7 commits
    • Manuel Pégourié-Gonnard's avatar
      aria: fix comment on aria_a function · 366e1b04
      Manuel Pégourié-Gonnard authored
      The new version of the comment has been generated by the following python3
      script, when the first constant is copy-pasted from RFC 5794 2.4.3.
      
       #!/usr/bin/python3
      
      RFC_A = """
            y0  = x3 ^ x4 ^ x6 ^ x8  ^ x9  ^ x13 ^ x14,
            y1  = x2 ^ x5 ^ x7 ^ x8  ^ x9  ^ x12 ^ x15,
            y2  = x1 ^ x4 ^ x6 ^ x10 ^ x11 ^ x12 ^ x15,
            y3  = x0 ^ x5 ^ x7 ^ x10 ^ x11 ^ x13 ^ x14,
            y4  = x0 ^ x2 ^ x5 ^ x8  ^ x11 ^ x14 ^ x15,
            y5  = x1 ^ x3 ^ x4 ^ x9  ^ x10 ^ x14 ^ x15,
            y6  = x0 ^ x2 ^ x7 ^ x9  ^ x10 ^ x12 ^ x13,
            y7  = x1 ^ x3 ^ x6 ^ x8  ^ x11 ^ x12 ^ x13,
            y8  = x0 ^ x1 ^ x4 ^ x7  ^ x10 ^ x13 ^ x15,
            y9  = x0 ^ x1 ^ x5 ^ x6  ^ x11 ^ x12 ^ x14,
            y10 = x2 ^ x3 ^ x5 ^ x6  ^ x8  ^ x13 ^ x15,
            y11 = x2 ^ x3 ^ x4 ^ x7  ^ x9  ^ x12 ^ x14,
            y12 = x1 ^ x2 ^ x6 ^ x7  ^ x9  ^ x11 ^ x12,
            y13 = x0 ^ x3 ^ x6 ^ x7  ^ x8  ^ x10 ^ x13,
            y14 = x0 ^ x3 ^ x4 ^ x5  ^ x9  ^ x11 ^ x14,
            y15 = x1 ^ x2 ^ x4 ^ x5  ^ x8  ^ x10 ^ x15.
      """
      
      matrix = []
      for l in RFC_A.split('\n')[1:-1]:
          rhs = l.split('=')[1][:-1]
          row = tuple(hex(int(t[2:]))[2:] for t in rhs.split('^'))
          matrix.append(row)
      
      out = {}
      out['a'] = tuple(''.join(w) for w in zip(*(matrix[0:4])))
      out['b'] = tuple(''.join(w) for w in zip(*(matrix[4:8])))
      out['c'] = tuple(''.join(w) for w in zip(*(matrix[8:12])))
      out['d'] = tuple(''.join(w) for w in zip(*(matrix[12:])))
      
      out2 = {}
      for o, r in out.items():
          row = list(r)
          for i in range(len(r) - 1):
              w1 = row[i]
              if len(set(w1)) == 2:
                  w2 = row[i+1]
                  nw1 = nw2 = ''
                  for j in range(len(w1)):
                      if w1[j] in nw1:
                          nw1 += w2[j]
                          nw2 += w1[j]
                      else:
                          nw1 += w1[j]
                          nw2 += w2[j]
                  row[i] = nw1
                  row[i+1] = nw2
      
          out2[o] = row
      
      for o in 'abcd':
          print(o,   '=', ' + '.join(out[o]))
          print(' ', '=', ' + '.join(out2[o]))
      366e1b04
    • Manuel Pégourié-Gonnard's avatar
      aria: improve compiler compat by using __asm · 21662148
      Manuel Pégourié-Gonnard authored
      gcc --std=c99 doesn't like the shorter "asm" (this broke all.sh)
      21662148
    • Manuel Pégourié-Gonnard's avatar
      aria: check arm arch version for asm · 2078725f
      Manuel Pégourié-Gonnard authored
      rev and rev16 are only supported from v6 (all profiles) and up.
      
      arm-none-eabi-gcc picks a lower architecture version by default, which means
      before this commit it would fail to build (assembler error) unless you
      manually specified -march=armv6-m -mthumb or similar, which broke all.sh.
      
      Source for version-checking macros:
      - GCC/Clang: use the -E -dM - </dev/null trick
      - armcc5: http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0472k/chr1359125007083.html
      - armclang 6: http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0774g/chr1383660321827.html
      
      Tested with the following script:
      
       #!/bin/sh
      
      set -eu
      
      ARMCLANG="env ARM_TOOL_VARIANT=ult $ARMC6_BIN_DIR/armclang"
      
      build() {
          echo "$@"
          "$@" -Iinclude -c library/aria.c
      
      }
      
      build arm-none-eabi-gcc
      build arm-none-eabi-gcc -march=armv5
      
      build clang --target=arm-none-eabi
      build clang --target=arm-none-eabi -march=armv5
      
      build armcc
      build armcc --gnu
      build armcc --cpu=5T
      build armcc --cpu=5T --gnu
      
      build $ARMCLANG --target=arm-arm-none-eabi
      
      check_asm() {
          rm -f aria.o
          build "$@"
          arm-none-eabi-objdump -d aria.o | grep rev16
      }
      
      check_asm arm-none-eabi-gcc -march=armv6-m -mthumb
      check_asm arm-none-eabi-gcc -march=armv7-m -mthumb
      check_asm arm-none-eabi-gcc -march=armv8-m.base -mthumb
      
      check_asm arm-none-eabi-gcc -march=armv7-a -mthumb
      check_asm arm-none-eabi-gcc -march=armv8-a -mthumb
      check_asm arm-none-eabi-gcc -march=armv7-a -marm
      check_asm arm-none-eabi-gcc -march=armv8-a -marm
      
      check_asm clang --target=arm-none-eabi -march=armv6-m
      check_asm clang --target=arm-none-eabi -march=armv7-a
      check_asm clang --target=arm-none-eabi -march=armv7-m
      check_asm clang --target=arm-none-eabi -march=armv7-r
      check_asm clang --target=arm-none-eabi -march=armv8-a
      
      check_asm armcc -O0 --cpu=6-M
      check_asm armcc -O0 --cpu=7-M
      check_asm armcc -O0 --cpu=6
      check_asm armcc -O0 --cpu=7-A
      
      check_asm $ARMCLANG --target=arm-arm-none-eabi -march=armv6-m
      check_asm $ARMCLANG --target=arm-arm-none-eabi -march=armv7-a
      check_asm $ARMCLANG --target=arm-arm-none-eabi -march=armv7-m
      check_asm $ARMCLANG --target=arm-arm-none-eabi -march=armv7-r
      check_asm $ARMCLANG --target=arm-arm-none-eabi -march=armv8-a
      check_asm $ARMCLANG --target=arm-arm-none-eabi -march=armv8-m.base
      2078725f
    • Manuel Pégourié-Gonnard's avatar
      aria: more whitespace fixes · 7fc08795
      Manuel Pégourié-Gonnard authored
      7fc08795
    • Manuel Pégourié-Gonnard's avatar
    • Manuel Pégourié-Gonnard's avatar
      f3a46a9b
    • Manuel Pégourié-Gonnard's avatar
      c0bb66f4
  7. 28 Feb, 2018 1 commit
  8. 27 Feb, 2018 17 commits
    • Manuel Pégourié-Gonnard's avatar
      aria: optimize byte perms on Arm · 377b2b62
      Manuel Pégourié-Gonnard authored
      Use specific instructions for moving bytes around in a word. This speeds
      things up, and as a side-effect, slightly lowers code size.
      
      ARIA_P3 and ARIA_P1 are now 1 single-cycle instruction each (those
      instructions are available in all architecture versions starting from v6-M).
      Note: ARIA_P3 was already translated to a single instruction by Clang 3.8 and
      armclang 6.5, but not arm-gcc 5.4 nor armcc 5.06.
      
      ARIA_P2 is already efficiently translated to the minimal number of
      instruction (1 in ARM mode, 2 in thumb mode) by all tested compilers
      
      Manually compiled and inspected generated code with the following compilers:
      arm-gcc 5.4, clang 3.8, armcc 5.06 (with and without --gnu), armclang 6.5.
      
      Size reduction (arm-none-eabi-gcc -march=armv6-m -mthumb -Os): 5288 -> 5044 B
      
      Effect on executing time of self-tests on a few boards:
      FRDM-K64F   (Cortex-M4):    444 ->  385 us (-13%)
      LPC1768     (Cortex-M3):    488 ->  432 us (-11%)
      FRDM-KL64Z  (Cortex-M0):   1429 -> 1134 us (-20%)
      
      Measured using a config.h with no cipher mode and the following program with
      aria.c and aria.h copy-pasted to the online compiler:
      
       #include "mbed.h"
       #include "aria.h"
      
      int main() {
          Timer t;
          t.start();
          int ret = mbedtls_aria_self_test(0);
          t.stop();
          printf("ret = %d; time = %d us\n", ret, t.read_us());
      }
      377b2b62
    • Manuel Pégourié-Gonnard's avatar
      aria: optimise byte perms on Intel · fb0e4f0d
      Manuel Pégourié-Gonnard authored
      (A similar commit for Arm follows.)
      
      Use specific instructions for moving bytes around in a word. This speeds
      things up, and as a side-effect, slightly lowers code size.
      
      ARIA_P3 (aka reverse byte order) is now 1 instruction on x86, which speeds up
      key schedule. (Clang 3.8 finds this but GCC 5.4 doesn't.)
      
      I couldn't find an Intel equivalent of ARM's ret16 (aka ARIA_P1), so I made it
      two instructions, which is still much better than the code generated with
      the previous mask-shift-or definition, and speeds up en/decryption. (Neither
      Clang 3.8 nor GCC 5.4 find this.)
      
      Before:
      O	aria.o	ins
      s	7976	43,865
      2	10520	37,631
      3	13040	28,146
      
      After:
      O	aria.o	ins
      s	7768	33,497
      2	9816	28,268
      3	11432	20,829
      
      For measurement method, see previous commit:
      "aria: turn macro into static inline function"
      fb0e4f0d
    • Manuel Pégourié-Gonnard's avatar
      aria: define P3 macro · cac5008b
      Manuel Pégourié-Gonnard authored
      This will allow to replace it with an optimised implementation later
      cac5008b
    • Manuel Pégourié-Gonnard's avatar
      aria: comment implementation of A transform · f205a012
      Manuel Pégourié-Gonnard authored
      The line-by-line comments were generated using the following Python 3 script:
      
       #!/usr/bin/python3
      
      class Atom:
          def __init__(self, val):
              self.v = val
      
          def __str__(self):
              return self.v
      
          def p1(self):
              v = self.v
              return Atom(v[1] + v[0] + v[3] + v[2])
      
          def p2(self):
              v = self.v
              return Atom(v[2] + v[3] + v[0] + v[1])
      
          def __xor__(self, other):
              return Sum(self.tuple() + other.tuple())
      
          def tuple(self):
              return (self,)
      
      class Sum:
          def __init__(self, terms):
              self.t = terms
              assert(type(terms) == tuple)
              for t in terms:
                  assert(type(t) == Atom)
      
          def __str__(self):
              return '+'.join(sorted((str(t) for t in self.t),
                              key=lambda v: int(v, 16)))
      
          def p1(self):
              return Sum(tuple(t.p1() for t in self.t))
      
          def p2(self):
              return Sum(tuple(t.p2() for t in self.t))
      
          def tuple(self):
              return self.t
      
          def __xor__(self, other):
              return Sum(self.t + other.tuple())
      
      class LoggingDict(dict):
          def __setitem__(self, key, val):
              print(key, '=', val)
              dict.__setitem__(self, key, val)
      
          def set(self, key, val):
              dict.__setitem__(self, key, val)
      
      env = LoggingDict()
      
      env.set('ra', Atom('0123'))
      env.set('rb', Atom('4567'))
      env.set('rc', Atom('89ab'))
      env.set('rd', Atom('cdef'))
      env.set('ARIA_P1', lambda x: x.p1())
      env.set('ARIA_P2', lambda x: x.p2())
      
      code = """
      ta  =   rb;
      rb  =   ra;
      ra  =   ARIA_P2( ta );
      tb  =   ARIA_P2( rd );
      rd  =   ARIA_P1( rc );
      rc  =   ARIA_P1( tb );
      ta  ^=  rd;
      tc  =   ARIA_P2( rb );
      ta  =   ARIA_P1( ta ) ^ tc ^ rc;
      tb  ^=  ARIA_P2( rd );
      tc  ^=  ARIA_P1( ra );
      rb  ^=  ta ^ tb;
      tb  =   ARIA_P2( tb ) ^ ta;
      ra  ^=  ARIA_P1( tb );
      ta  =   ARIA_P2( ta );
      rd  ^=  ARIA_P1( ta ) ^ tc;
      tc  =   ARIA_P2( tc );
      rc  ^=  ARIA_P1( tc ) ^ ta;
      """
      
      exec(code, env)
      f205a012
    • Manuel Pégourié-Gonnard's avatar
    • Manuel Pégourié-Gonnard's avatar
      aria: define SLA() as sl(a()) · 64744f88
      Manuel Pégourié-Gonnard authored
      This decreases the size with -Os by nearly 1k while
      not hurting performance too much with -O2 and -O3
      
      Before:
      O	aria.o	ins
      s	8784	41,408
      2	11112	37,001
      3	13096	27,438
      
      After:
      O	aria.o	ins
      s	7976	43,865
      2	10520	37,631
      3	13040	28,146
      
      (See previous commit for measurement details.)
      64744f88
    • Manuel Pégourié-Gonnard's avatar
      aria: turn macro into static inline function · 8c76a948
      Manuel Pégourié-Gonnard authored
      Besides documenting types better and so on, this give the compiler more room
      to optimise either for size or performance.
      
      Here are some before/after measurements of:
      - size of aria.o in bytes (less is better)
      - instruction count for the selftest function (less is better)
      with various -O flags.
      
      Before:
      O	aria.o	ins
      s	10896	37,256
      2	11176	37,199
      3	12248	27,752
      
      After:
      O	aria.o	ins
      s	8784	41,408
      2	11112	37,001
      3	13096	27,438
      
      The new version allows the compiler to reach smaller size with -Os while
      maintaining (actually slightly improving) performance with -O2 and -O3.
      
      Measurements were done on x86_64 (but since this is mainly about inlining
      code, this should transpose well to other platforms) using the following
      helper program and script, after disabling CBC, CFB and CTR in config.h, in
      order to focus on the core functions.
      
      ==> st.c <==
       #include "mbedtls/aria.h"
      
      int main( void ) {
          return mbedtls_aria_self_test( 0 );
      }
      
      ==> p.sh <==
       #!/bin/sh
      
      set -eu
      
      ccount () {
          (
          valgrind --tool=callgrind --dump-line=no --callgrind-out-file=/dev/null --collect-atstart=no --toggle-collect=main $1
          ) 2>&1 | sed -n -e 's/.*refs: *\([0-9,]*\)/\1/p'
      }
      
      printf "O\taria.o\tins\n"
      for O in s 2 3; do
          GCC="gcc -Wall -Wextra -Werror -Iinclude"
      
          $GCC -O$O -c library/aria.c
          $GCC -O1 st.c aria.o -o st
         ./st
      
          SIZE=$( du -b aria.o | cut -f1 )
          INS=$( ccount ./st )
      
          printf "$O\t$SIZE\t$INS\n"
      done
      8c76a948
    • Manuel Pégourié-Gonnard's avatar
      aria: closer to usual comment style · a41ecdab
      Manuel Pégourié-Gonnard authored
      We're not absolutely consistent in the rest of the library, but we tend to use
      C99-style comments less often.
      
      Change to use C89-style comments everywhere except for end-of-line comments
      a41ecdab
    • Manuel Pégourié-Gonnard's avatar
      aria: use mbedtls_zeroize() · 56453937
      Manuel Pégourié-Gonnard authored
      56453937
    • Manuel Pégourié-Gonnard's avatar
      c76ceb67
    • Manuel Pégourié-Gonnard's avatar
      9cc89248
    • Manuel Pégourié-Gonnard's avatar
      e1ad7491
    • Manuel Pégourié-Gonnard's avatar
      a6d639e5
    • Markku-Juhani O. Saarinen's avatar
      ARIA init and free · 6ba68d4a
      Markku-Juhani O. Saarinen authored
      6ba68d4a
    • Markku-Juhani O. Saarinen's avatar
      ARIA build integration · 3c0b53b2
      Markku-Juhani O. Saarinen authored
      3c0b53b2
    • Markku-Juhani O. Saarinen's avatar
      259fa60f
    • Markku-Juhani O. Saarinen's avatar
      ARIA cipher implementation · 41efbaab
      Markku-Juhani O. Saarinen authored
      41efbaab