2022年4月 – ページ 2

SDKのライブラリが未定義命令を使っているみたいで64bit幅の型(uint64/double型など)で乗除算を実行すると停止(例外？)してしまうので64bit対応のための乗除算(一部のみ)を作ってみた。

乗算は昔よく使った筆算アルゴリズムを思い出し数十年ぶりに使ってみた。除算はオリジナルのアルゴリズムだ。最初は全てアセンブラでと思ったがドキュメントがない状況で手探りで作るしかなく乗算が完成したところで力尽きてしまった．．．orz

でも、さすがに除算を力任せアルゴリズム(64bit値から何回引けるか)で計算する気にはなれなかったので最大64回のシフトと加減算のみで実行できるアルゴリズムを考えてみた。これならそこそこ早いほうだと思う。

ちなみに同様なアルゴリズムで乗算も可能だ。ブースのアルゴリズムよりひねりが足りないがよりシンプルに書けるはずだし筆算アルゴリズムよりも高速になるデータパターンもあると思うので興味のある人は考えてみて。要点はループ回数を最小化することに尽きる。

【修正履歴】
2022-05-04
divu64u64()がまだバグってたので再修正。

2022-05-03
divu64u64()がまだバグってたので修正。

2022-04-28
divu64u64()が凄まじくバグッてた。というか必要なコードを入れたつもりで入れるのをすっかり忘れてた。(-_-;)

【ライブラリ】
最上位のビット(1)位置を返すSystem::lastBit()を使っていることに注意。JN516x(OpenRISC)固有の命令により実装されている。

/*
  muldiv.h - Multiplication and Division Library for NXP-JN516x

  Copyright (c) 2022 Sasapea's Lab. All right reserved.

  This library is free software; you can redistribute it and/or
  modify it under the terms of the GNU Lesser General Public
  License as published by the Free Software Foundation; either
  version 2.1 of the License, or (at your option) any later version.

  This library is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  Lesser General Public License for more details.

  You should have received a copy of the GNU Lesser General
  Public License along with this library; if not, write to the
  Free Software Foundation, Inc., 59 Temple Place, Suite 330,
  Boston, MA  02111-1307  USA
*/
#pragma once

#include <jendefs.h>

class MulDiv
{
  public:

    static uint64 mulu32u32(uint32 a, uint32 b) __attribute__((noinline,optimize(1)));
    static uint64 divu64u64(uint64 a, uint64 b, uint64 *c = 0) __attribute__((noinline,optimize(3)));
};

muldiv.h - Multiplication and Division Library for NXP-JN516x

This library is free software; you can redistribute it and/or

modify it under the terms of the GNU Lesser General Public

License as published by the Free Software Foundation; either

version 2.1 of the License, or (at your option) any later version.

This library is distributed in the hope that it will be useful,

but WITHOUT ANY WARRANTY; without even the implied warranty of

MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General

Public License along with this library; if not, write to the

Free Software Foundation, Inc., 59 Temple Place, Suite 330,

Boston, MA 02111-1307 USA

#pragma once

#include <jendefs.h>

class MulDiv

{

public:

static uint64 mulu32u32(uint32 a, uint32 b) __attribute__((noinline,optimize(1)));

static uint64 divu64u64(uint64 a, uint64 b, uint64 *c = 0) __attribute__((noinline,optimize(3)));

};

/*
  muldiv.h - Multiplication and Division Library for NXP-JN516x

  Copyright (c) 2022 Sasapea's Lab. All right reserved.

  This library is free software; you can redistribute it and/or
  modify it under the terms of the GNU Lesser General Public
  License as published by the Free Software Foundation; either
  version 2.1 of the License, or (at your option) any later version.

  This library is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  Lesser General Public License for more details.

  You should have received a copy of the GNU Lesser General
  Public License along with this library; if not, write to the
  Free Software Foundation, Inc., 59 Temple Place, Suite 330,
  Boston, MA  02111-1307  USA
*/
#include <muldiv.h>
#include "system.h"

#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wreturn-type"

uint64 MulDiv::mulu32u32(uint32 a, uint32 b)
{
  asm volatile ("b.mov  r5, %0" :: "r" (a) : "r5");
  asm volatile ("b.mov  r7, %0" :: "r" (b) : "r7");
  asm volatile ("b.mov  r6, r5");
  asm volatile ("b.mov  r8, r7");
  asm volatile ("b.andi r5, r5, 0xFFFF");
  asm volatile ("b.srli r6, r6, 16");
  asm volatile ("b.andi r7, r7, 0xFFFF");
  asm volatile ("b.srli r8, r8, 16");
  asm volatile ("b.mul  r3, r5, r8");
  asm volatile ("b.mul  r4, r5, r7");
  asm volatile ("b.mul  r5, r6, r7");
  asm volatile ("b.mul  r6, r6, r8");
  asm volatile ("b.srli r8, r3, 16");
  asm volatile ("b.slli r7, r3, 16");
  asm volatile ("b.add  r4, r4, r7");
  asm volatile ("b.addc r3, r0, r8");
  asm volatile ("b.srli r8, r5, 16");
  asm volatile ("b.slli r7, r5, 16");
  asm volatile ("b.add  r4, r4, r7");
  asm volatile ("b.addc r3, r3, r8");
  asm volatile ("b.add  r3, r3, r6");
  // reurn r3(H),r4(L)
}

#pragma GCC diagnostic pop

uint64 MulDiv::divu64u64(uint64 n, uint64 d, uint64 *r)
{
  uint64 q = (uint64)-1;
  if (d)
  {
    q = 0;
    uint32 a = System::lastBit64(n);
    uint32 b = System::lastBit64(d);
    for (b = (a > b ? a - b : 0); n >= d; --b)
    {
      uint64 m = d << b;
      if (n >= m)
      {
        n -= m;
        q |= 1ULL << b;
      }
    }
  }
  if (r)
    *r = n;
  return q;
}

muldiv.h - Multiplication and Division Library for NXP-JN516x

This library is free software; you can redistribute it and/or

modify it under the terms of the GNU Lesser General Public

License as published by the Free Software Foundation; either

version 2.1 of the License, or (at your option) any later version.

This library is distributed in the hope that it will be useful,

but WITHOUT ANY WARRANTY; without even the implied warranty of

MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General

Public License along with this library; if not, write to the

Free Software Foundation, Inc., 59 Temple Place, Suite 330,

Boston, MA 02111-1307 USA

#include <muldiv.h>

#include "system.h"

#pragma GCC diagnostic push

#pragma GCC diagnostic ignored "-Wreturn-type"

uint64 MulDiv::mulu32u32(uint32 a, uint32 b)

{

asm volatile ("b.mov r5, %0" :: "r" (a) : "r5");

asm volatile ("b.mov r7, %0" :: "r" (b) : "r7");

asm volatile ("b.mov r6, r5");

asm volatile ("b.mov r8, r7");

asm volatile ("b.andi r5, r5, 0xFFFF");

asm volatile ("b.srli r6, r6, 16");

asm volatile ("b.andi r7, r7, 0xFFFF");

asm volatile ("b.srli r8, r8, 16");

asm volatile ("b.mul r3, r5, r8");

asm volatile ("b.mul r4, r5, r7");

asm volatile ("b.mul r5, r6, r7");

asm volatile ("b.mul r6, r6, r8");

asm volatile ("b.srli r8, r3, 16");

asm volatile ("b.slli r7, r3, 16");

asm volatile ("b.add r4, r4, r7");

asm volatile ("b.addc r3, r0, r8");

asm volatile ("b.srli r8, r5, 16");

asm volatile ("b.slli r7, r5, 16");

asm volatile ("b.add r4, r4, r7");

asm volatile ("b.addc r3, r3, r8");

asm volatile ("b.add r3, r3, r6");

// reurn r3(H),r4(L)

}

#pragma GCC diagnostic pop

uint64 MulDiv::divu64u64(uint64 n, uint64 d, uint64 *r)

{

uint64 q = (uint64)-1;

if (d)

{

q = 0;

uint32 a = System::lastBit64(n);

uint32 b = System::lastBit64(d);

for (b = (a > b ? a - b : 0); n >= d; --b)

{

uint64 m = d << b;

if (n >= m)

{

n -= m;

q |= 1ULL << b;

}

if (r)

*r = n;

return q;

}

【プロジェクトのダウンロード】
NXP JN516X (TWELITE) をプログラミングする（Eclipse-CDT+MWSTAGE)

月: 2022年4月

NXP JN516X (TWELITE) をプログラミングする（Multiply and Division)