M7350v1_en_gpl

This commit is contained in:
T
2024-09-09 08:52:07 +00:00
commit f9cc65cfda
65988 changed files with 26357421 additions and 0 deletions
@@ -0,0 +1,438 @@
/* libs/pixelflinger/codeflinger/ARMAssembler.cpp
**
** Copyright 2006, The Android Open Source Project
**
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
**
** http://www.apache.org/licenses/LICENSE-2.0
**
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*/
#define LOG_TAG "ARMAssembler"
#include <stdio.h>
#include <stdlib.h>
#include <cutils/log.h>
#include <cutils/properties.h>
#if defined(WITH_LIB_HARDWARE)
#include <hardware_legacy/qemu_tracing.h>
#endif
#include <private/pixelflinger/ggl_context.h>
#include "codeflinger/ARMAssembler.h"
#include "codeflinger/CodeCache.h"
#include "codeflinger/disassem.h"
// ----------------------------------------------------------------------------
namespace android {
// ----------------------------------------------------------------------------
#if 0
#pragma mark -
#pragma mark ARMAssembler...
#endif
ARMAssembler::ARMAssembler(const sp<Assembly>& assembly)
: ARMAssemblerInterface(),
mAssembly(assembly)
{
mBase = mPC = (uint32_t *)assembly->base();
mDuration = ggl_system_time();
#if defined(WITH_LIB_HARDWARE)
mQemuTracing = true;
#endif
}
ARMAssembler::~ARMAssembler()
{
}
uint32_t* ARMAssembler::pc() const
{
return mPC;
}
uint32_t* ARMAssembler::base() const
{
return mBase;
}
void ARMAssembler::reset()
{
mBase = mPC = (uint32_t *)mAssembly->base();
mBranchTargets.clear();
mLabels.clear();
mLabelsInverseMapping.clear();
mComments.clear();
}
// ----------------------------------------------------------------------------
void ARMAssembler::disassemble(const char* name)
{
if (name) {
printf("%s:\n", name);
}
size_t count = pc()-base();
uint32_t* i = base();
while (count--) {
ssize_t label = mLabelsInverseMapping.indexOfKey(i);
if (label >= 0) {
printf("%s:\n", mLabelsInverseMapping.valueAt(label));
}
ssize_t comment = mComments.indexOfKey(i);
if (comment >= 0) {
printf("; %s\n", mComments.valueAt(comment));
}
printf("%08x: %08x ", int(i), int(i[0]));
::disassemble((u_int)i);
i++;
}
}
void ARMAssembler::comment(const char* string)
{
mComments.add(mPC, string);
}
void ARMAssembler::label(const char* theLabel)
{
mLabels.add(theLabel, mPC);
mLabelsInverseMapping.add(mPC, theLabel);
}
void ARMAssembler::B(int cc, const char* label)
{
mBranchTargets.add(branch_target_t(label, mPC));
*mPC++ = (cc<<28) | (0xA<<24) | 0;
}
void ARMAssembler::BL(int cc, const char* label)
{
mBranchTargets.add(branch_target_t(label, mPC));
*mPC++ = (cc<<28) | (0xB<<24) | 0;
}
#if 0
#pragma mark -
#pragma mark Prolog/Epilog & Generate...
#endif
void ARMAssembler::prolog()
{
// write dummy prolog code
mPrologPC = mPC;
STM(AL, FD, SP, 1, LSAVED);
}
void ARMAssembler::epilog(uint32_t touched)
{
touched &= LSAVED;
if (touched) {
// write prolog code
uint32_t* pc = mPC;
mPC = mPrologPC;
STM(AL, FD, SP, 1, touched | LLR);
mPC = pc;
// write epilog code
LDM(AL, FD, SP, 1, touched | LLR);
BX(AL, LR);
} else { // heh, no registers to save!
// write prolog code
uint32_t* pc = mPC;
mPC = mPrologPC;
MOV(AL, 0, R0, R0); // NOP
mPC = pc;
// write epilog code
BX(AL, LR);
}
}
int ARMAssembler::generate(const char* name)
{
// fixup all the branches
size_t count = mBranchTargets.size();
while (count--) {
const branch_target_t& bt = mBranchTargets[count];
uint32_t* target_pc = mLabels.valueFor(bt.label);
LOG_ALWAYS_FATAL_IF(!target_pc,
"error resolving branch targets, target_pc is null");
int32_t offset = int32_t(target_pc - (bt.pc+2));
*bt.pc |= offset & 0xFFFFFF;
}
mAssembly->resize( int(pc()-base())*4 );
// the instruction cache is flushed by CodeCache
const int64_t duration = ggl_system_time() - mDuration;
const char * const format = "generated %s (%d ins) at [%p:%p] in %lld ns\n";
LOGI(format, name, int(pc()-base()), base(), pc(), duration);
#if defined(WITH_LIB_HARDWARE)
if (__builtin_expect(mQemuTracing, 0)) {
int err = qemu_add_mapping(int(base()), name);
mQemuTracing = (err >= 0);
}
#endif
char value[PROPERTY_VALUE_MAX];
property_get("debug.pf.disasm", value, "0");
if (atoi(value) != 0) {
printf(format, name, int(pc()-base()), base(), pc(), duration);
disassemble(name);
}
return NO_ERROR;
}
uint32_t* ARMAssembler::pcForLabel(const char* label)
{
return mLabels.valueFor(label);
}
// ----------------------------------------------------------------------------
#if 0
#pragma mark -
#pragma mark Data Processing...
#endif
void ARMAssembler::dataProcessing(int opcode, int cc,
int s, int Rd, int Rn, uint32_t Op2)
{
*mPC++ = (cc<<28) | (opcode<<21) | (s<<20) | (Rn<<16) | (Rd<<12) | Op2;
}
#if 0
#pragma mark -
#pragma mark Multiply...
#endif
// multiply...
void ARMAssembler::MLA(int cc, int s,
int Rd, int Rm, int Rs, int Rn) {
if (Rd == Rm) { int t = Rm; Rm=Rs; Rs=t; }
LOG_FATAL_IF(Rd==Rm, "MLA(r%u,r%u,r%u,r%u)", Rd,Rm,Rs,Rn);
*mPC++ = (cc<<28) | (1<<21) | (s<<20) |
(Rd<<16) | (Rn<<12) | (Rs<<8) | 0x90 | Rm;
}
void ARMAssembler::MUL(int cc, int s,
int Rd, int Rm, int Rs) {
if (Rd == Rm) { int t = Rm; Rm=Rs; Rs=t; }
LOG_FATAL_IF(Rd==Rm, "MUL(r%u,r%u,r%u)", Rd,Rm,Rs);
*mPC++ = (cc<<28) | (s<<20) | (Rd<<16) | (Rs<<8) | 0x90 | Rm;
}
void ARMAssembler::UMULL(int cc, int s,
int RdLo, int RdHi, int Rm, int Rs) {
LOG_FATAL_IF(RdLo==Rm || RdHi==Rm || RdLo==RdHi,
"UMULL(r%u,r%u,r%u,r%u)", RdLo,RdHi,Rm,Rs);
*mPC++ = (cc<<28) | (1<<23) | (s<<20) |
(RdHi<<16) | (RdLo<<12) | (Rs<<8) | 0x90 | Rm;
}
void ARMAssembler::UMUAL(int cc, int s,
int RdLo, int RdHi, int Rm, int Rs) {
LOG_FATAL_IF(RdLo==Rm || RdHi==Rm || RdLo==RdHi,
"UMUAL(r%u,r%u,r%u,r%u)", RdLo,RdHi,Rm,Rs);
*mPC++ = (cc<<28) | (1<<23) | (1<<21) | (s<<20) |
(RdHi<<16) | (RdLo<<12) | (Rs<<8) | 0x90 | Rm;
}
void ARMAssembler::SMULL(int cc, int s,
int RdLo, int RdHi, int Rm, int Rs) {
LOG_FATAL_IF(RdLo==Rm || RdHi==Rm || RdLo==RdHi,
"SMULL(r%u,r%u,r%u,r%u)", RdLo,RdHi,Rm,Rs);
*mPC++ = (cc<<28) | (1<<23) | (1<<22) | (s<<20) |
(RdHi<<16) | (RdLo<<12) | (Rs<<8) | 0x90 | Rm;
}
void ARMAssembler::SMUAL(int cc, int s,
int RdLo, int RdHi, int Rm, int Rs) {
LOG_FATAL_IF(RdLo==Rm || RdHi==Rm || RdLo==RdHi,
"SMUAL(r%u,r%u,r%u,r%u)", RdLo,RdHi,Rm,Rs);
*mPC++ = (cc<<28) | (1<<23) | (1<<22) | (1<<21) | (s<<20) |
(RdHi<<16) | (RdLo<<12) | (Rs<<8) | 0x90 | Rm;
}
#if 0
#pragma mark -
#pragma mark Branches...
#endif
// branches...
void ARMAssembler::B(int cc, uint32_t* pc)
{
int32_t offset = int32_t(pc - (mPC+2));
*mPC++ = (cc<<28) | (0xA<<24) | (offset & 0xFFFFFF);
}
void ARMAssembler::BL(int cc, uint32_t* pc)
{
int32_t offset = int32_t(pc - (mPC+2));
*mPC++ = (cc<<28) | (0xB<<24) | (offset & 0xFFFFFF);
}
void ARMAssembler::BX(int cc, int Rn)
{
*mPC++ = (cc<<28) | 0x12FFF10 | Rn;
}
#if 0
#pragma mark -
#pragma mark Data Transfer...
#endif
// data transfert...
void ARMAssembler::LDR(int cc, int Rd, int Rn, uint32_t offset) {
*mPC++ = (cc<<28) | (1<<26) | (1<<20) | (Rn<<16) | (Rd<<12) | offset;
}
void ARMAssembler::LDRB(int cc, int Rd, int Rn, uint32_t offset) {
*mPC++ = (cc<<28) | (1<<26) | (1<<22) | (1<<20) | (Rn<<16) | (Rd<<12) | offset;
}
void ARMAssembler::STR(int cc, int Rd, int Rn, uint32_t offset) {
*mPC++ = (cc<<28) | (1<<26) | (Rn<<16) | (Rd<<12) | offset;
}
void ARMAssembler::STRB(int cc, int Rd, int Rn, uint32_t offset) {
*mPC++ = (cc<<28) | (1<<26) | (1<<22) | (Rn<<16) | (Rd<<12) | offset;
}
void ARMAssembler::LDRH(int cc, int Rd, int Rn, uint32_t offset) {
*mPC++ = (cc<<28) | (1<<20) | (Rn<<16) | (Rd<<12) | 0xB0 | offset;
}
void ARMAssembler::LDRSB(int cc, int Rd, int Rn, uint32_t offset) {
*mPC++ = (cc<<28) | (1<<20) | (Rn<<16) | (Rd<<12) | 0xD0 | offset;
}
void ARMAssembler::LDRSH(int cc, int Rd, int Rn, uint32_t offset) {
*mPC++ = (cc<<28) | (1<<20) | (Rn<<16) | (Rd<<12) | 0xF0 | offset;
}
void ARMAssembler::STRH(int cc, int Rd, int Rn, uint32_t offset) {
*mPC++ = (cc<<28) | (Rn<<16) | (Rd<<12) | 0xB0 | offset;
}
#if 0
#pragma mark -
#pragma mark Block Data Transfer...
#endif
// block data transfer...
void ARMAssembler::LDM(int cc, int dir,
int Rn, int W, uint32_t reg_list)
{ // ED FD EA FA IB IA DB DA
const uint8_t P[8] = { 1, 0, 1, 0, 1, 0, 1, 0 };
const uint8_t U[8] = { 1, 1, 0, 0, 1, 1, 0, 0 };
*mPC++ = (cc<<28) | (4<<25) | (uint32_t(P[dir])<<24) |
(uint32_t(U[dir])<<23) | (1<<20) | (W<<21) | (Rn<<16) | reg_list;
}
void ARMAssembler::STM(int cc, int dir,
int Rn, int W, uint32_t reg_list)
{ // FA EA FD ED IB IA DB DA
const uint8_t P[8] = { 0, 1, 0, 1, 1, 0, 1, 0 };
const uint8_t U[8] = { 0, 0, 1, 1, 1, 1, 0, 0 };
*mPC++ = (cc<<28) | (4<<25) | (uint32_t(P[dir])<<24) |
(uint32_t(U[dir])<<23) | (0<<20) | (W<<21) | (Rn<<16) | reg_list;
}
#if 0
#pragma mark -
#pragma mark Special...
#endif
// special...
void ARMAssembler::SWP(int cc, int Rn, int Rd, int Rm) {
*mPC++ = (cc<<28) | (2<<23) | (Rn<<16) | (Rd << 12) | 0x90 | Rm;
}
void ARMAssembler::SWPB(int cc, int Rn, int Rd, int Rm) {
*mPC++ = (cc<<28) | (2<<23) | (1<<22) | (Rn<<16) | (Rd << 12) | 0x90 | Rm;
}
void ARMAssembler::SWI(int cc, uint32_t comment) {
*mPC++ = (cc<<28) | (0xF<<24) | comment;
}
#if 0
#pragma mark -
#pragma mark DSP instructions...
#endif
// DSP instructions...
void ARMAssembler::PLD(int Rn, uint32_t offset) {
LOG_ALWAYS_FATAL_IF(!((offset&(1<<24)) && !(offset&(1<<21))),
"PLD only P=1, W=0");
*mPC++ = 0xF550F000 | (Rn<<16) | offset;
}
void ARMAssembler::CLZ(int cc, int Rd, int Rm)
{
*mPC++ = (cc<<28) | 0x16F0F10| (Rd<<12) | Rm;
}
void ARMAssembler::QADD(int cc, int Rd, int Rm, int Rn)
{
*mPC++ = (cc<<28) | 0x1000050 | (Rn<<16) | (Rd<<12) | Rm;
}
void ARMAssembler::QDADD(int cc, int Rd, int Rm, int Rn)
{
*mPC++ = (cc<<28) | 0x1400050 | (Rn<<16) | (Rd<<12) | Rm;
}
void ARMAssembler::QSUB(int cc, int Rd, int Rm, int Rn)
{
*mPC++ = (cc<<28) | 0x1200050 | (Rn<<16) | (Rd<<12) | Rm;
}
void ARMAssembler::QDSUB(int cc, int Rd, int Rm, int Rn)
{
*mPC++ = (cc<<28) | 0x1600050 | (Rn<<16) | (Rd<<12) | Rm;
}
void ARMAssembler::SMUL(int cc, int xy,
int Rd, int Rm, int Rs)
{
*mPC++ = (cc<<28) | 0x1600080 | (Rd<<16) | (Rs<<8) | (xy<<4) | Rm;
}
void ARMAssembler::SMULW(int cc, int y,
int Rd, int Rm, int Rs)
{
*mPC++ = (cc<<28) | 0x12000A0 | (Rd<<16) | (Rs<<8) | (y<<4) | Rm;
}
void ARMAssembler::SMLA(int cc, int xy,
int Rd, int Rm, int Rs, int Rn)
{
*mPC++ = (cc<<28) | 0x1000080 | (Rd<<16) | (Rn<<12) | (Rs<<8) | (xy<<4) | Rm;
}
void ARMAssembler::SMLAL(int cc, int xy,
int RdHi, int RdLo, int Rs, int Rm)
{
*mPC++ = (cc<<28) | 0x1400080 | (RdHi<<16) | (RdLo<<12) | (Rs<<8) | (xy<<4) | Rm;
}
void ARMAssembler::SMLAW(int cc, int y,
int Rd, int Rm, int Rs, int Rn)
{
*mPC++ = (cc<<28) | 0x1200080 | (Rd<<16) | (Rn<<12) | (Rs<<8) | (y<<4) | Rm;
}
#if 0
#pragma mark -
#pragma mark Byte/half word extract and extend (ARMv6+ only)...
#endif
void ARMAssembler::UXTB16(int cc, int Rd, int Rm, int rotate)
{
*mPC++ = (cc<<28) | 0x6CF0070 | (Rd<<12) | ((rotate >> 3) << 10) | Rm;
}
}; // namespace android
@@ -0,0 +1,157 @@
/* libs/pixelflinger/codeflinger/ARMAssembler.h
**
** Copyright 2006, The Android Open Source Project
**
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
**
** http://www.apache.org/licenses/LICENSE-2.0
**
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*/
#ifndef ANDROID_ARMASSEMBLER_H
#define ANDROID_ARMASSEMBLER_H
#include <stdint.h>
#include <sys/types.h>
#include "tinyutils/Vector.h"
#include "tinyutils/KeyedVector.h"
#include "tinyutils/smartpointer.h"
#include "tinyutils/smartpointer.h"
#include "codeflinger/ARMAssemblerInterface.h"
#include "codeflinger/CodeCache.h"
namespace android {
// ----------------------------------------------------------------------------
class ARMAssembler : public ARMAssemblerInterface
{
public:
ARMAssembler(const sp<Assembly>& assembly);
virtual ~ARMAssembler();
uint32_t* base() const;
uint32_t* pc() const;
void disassemble(const char* name);
// ------------------------------------------------------------------------
// ARMAssemblerInterface...
// ------------------------------------------------------------------------
virtual void reset();
virtual int generate(const char* name);
virtual void prolog();
virtual void epilog(uint32_t touched);
virtual void comment(const char* string);
virtual void dataProcessing(int opcode, int cc, int s,
int Rd, int Rn,
uint32_t Op2);
virtual void MLA(int cc, int s,
int Rd, int Rm, int Rs, int Rn);
virtual void MUL(int cc, int s,
int Rd, int Rm, int Rs);
virtual void UMULL(int cc, int s,
int RdLo, int RdHi, int Rm, int Rs);
virtual void UMUAL(int cc, int s,
int RdLo, int RdHi, int Rm, int Rs);
virtual void SMULL(int cc, int s,
int RdLo, int RdHi, int Rm, int Rs);
virtual void SMUAL(int cc, int s,
int RdLo, int RdHi, int Rm, int Rs);
virtual void B(int cc, uint32_t* pc);
virtual void BL(int cc, uint32_t* pc);
virtual void BX(int cc, int Rn);
virtual void label(const char* theLabel);
virtual void B(int cc, const char* label);
virtual void BL(int cc, const char* label);
virtual uint32_t* pcForLabel(const char* label);
virtual void LDR (int cc, int Rd,
int Rn, uint32_t offset = immed12_pre(0));
virtual void LDRB(int cc, int Rd,
int Rn, uint32_t offset = immed12_pre(0));
virtual void STR (int cc, int Rd,
int Rn, uint32_t offset = immed12_pre(0));
virtual void STRB(int cc, int Rd,
int Rn, uint32_t offset = immed12_pre(0));
virtual void LDRH (int cc, int Rd,
int Rn, uint32_t offset = immed8_pre(0));
virtual void LDRSB(int cc, int Rd,
int Rn, uint32_t offset = immed8_pre(0));
virtual void LDRSH(int cc, int Rd,
int Rn, uint32_t offset = immed8_pre(0));
virtual void STRH (int cc, int Rd,
int Rn, uint32_t offset = immed8_pre(0));
virtual void LDM(int cc, int dir,
int Rn, int W, uint32_t reg_list);
virtual void STM(int cc, int dir,
int Rn, int W, uint32_t reg_list);
virtual void SWP(int cc, int Rn, int Rd, int Rm);
virtual void SWPB(int cc, int Rn, int Rd, int Rm);
virtual void SWI(int cc, uint32_t comment);
virtual void PLD(int Rn, uint32_t offset);
virtual void CLZ(int cc, int Rd, int Rm);
virtual void QADD(int cc, int Rd, int Rm, int Rn);
virtual void QDADD(int cc, int Rd, int Rm, int Rn);
virtual void QSUB(int cc, int Rd, int Rm, int Rn);
virtual void QDSUB(int cc, int Rd, int Rm, int Rn);
virtual void SMUL(int cc, int xy,
int Rd, int Rm, int Rs);
virtual void SMULW(int cc, int y,
int Rd, int Rm, int Rs);
virtual void SMLA(int cc, int xy,
int Rd, int Rm, int Rs, int Rn);
virtual void SMLAL(int cc, int xy,
int RdHi, int RdLo, int Rs, int Rm);
virtual void SMLAW(int cc, int y,
int Rd, int Rm, int Rs, int Rn);
virtual void UXTB16(int cc, int Rd, int Rm, int rotate);
private:
ARMAssembler(const ARMAssembler& rhs);
ARMAssembler& operator = (const ARMAssembler& rhs);
sp<Assembly> mAssembly;
uint32_t* mBase;
uint32_t* mPC;
uint32_t* mPrologPC;
int64_t mDuration;
#if defined(WITH_LIB_HARDWARE)
bool mQemuTracing;
#endif
struct branch_target_t {
inline branch_target_t() : label(0), pc(0) { }
inline branch_target_t(const char* l, uint32_t* p)
: label(l), pc(p) { }
const char* label;
uint32_t* pc;
};
Vector<branch_target_t> mBranchTargets;
KeyedVector< const char*, uint32_t* > mLabels;
KeyedVector< uint32_t*, const char* > mLabelsInverseMapping;
KeyedVector< uint32_t*, const char* > mComments;
};
}; // namespace android
#endif //ANDROID_ARMASSEMBLER_H
@@ -0,0 +1,173 @@
/* libs/pixelflinger/codeflinger/ARMAssemblerInterface.cpp
**
** Copyright 2006, The Android Open Source Project
**
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
**
** http://www.apache.org/licenses/LICENSE-2.0
**
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*/
#include <errno.h>
#include <stdlib.h>
#include <stdint.h>
#include <sys/types.h>
#include <cutils/log.h>
#include "codeflinger/ARMAssemblerInterface.h"
namespace android {
// ----------------------------------------------------------------------------
ARMAssemblerInterface::~ARMAssemblerInterface()
{
}
int ARMAssemblerInterface::buildImmediate(
uint32_t immediate, uint32_t& rot, uint32_t& imm)
{
rot = 0;
imm = immediate;
if (imm > 0x7F) { // skip the easy cases
while (!(imm&3) || (imm&0xFC000000)) {
uint32_t newval;
newval = imm >> 2;
newval |= (imm&3) << 30;
imm = newval;
rot += 2;
if (rot == 32) {
rot = 0;
break;
}
}
}
rot = (16 - (rot>>1)) & 0xF;
if (imm>=0x100)
return -EINVAL;
if (((imm>>(rot<<1)) | (imm<<(32-(rot<<1)))) != immediate)
return -1;
return 0;
}
// shifters...
bool ARMAssemblerInterface::isValidImmediate(uint32_t immediate)
{
uint32_t rot, imm;
return buildImmediate(immediate, rot, imm) == 0;
}
uint32_t ARMAssemblerInterface::imm(uint32_t immediate)
{
uint32_t rot, imm;
int err = buildImmediate(immediate, rot, imm);
LOG_ALWAYS_FATAL_IF(err==-EINVAL,
"immediate %08x cannot be encoded",
immediate);
LOG_ALWAYS_FATAL_IF(err,
"immediate (%08x) encoding bogus!",
immediate);
return (1<<25) | (rot<<8) | imm;
}
uint32_t ARMAssemblerInterface::reg_imm(int Rm, int type, uint32_t shift)
{
return ((shift&0x1F)<<7) | ((type&0x3)<<5) | (Rm&0xF);
}
uint32_t ARMAssemblerInterface::reg_rrx(int Rm)
{
return (ROR<<5) | (Rm&0xF);
}
uint32_t ARMAssemblerInterface::reg_reg(int Rm, int type, int Rs)
{
return ((Rs&0xF)<<8) | ((type&0x3)<<5) | (1<<4) | (Rm&0xF);
}
// addressing modes...
// LDR(B)/STR(B)/PLD (immediate and Rm can be negative, which indicate U=0)
uint32_t ARMAssemblerInterface::immed12_pre(int32_t immed12, int W)
{
LOG_ALWAYS_FATAL_IF(abs(immed12) >= 0x800,
"LDR(B)/STR(B)/PLD immediate too big (%08x)",
immed12);
return (1<<24) | (((uint32_t(immed12)>>31)^1)<<23) |
((W&1)<<21) | (abs(immed12)&0x7FF);
}
uint32_t ARMAssemblerInterface::immed12_post(int32_t immed12)
{
LOG_ALWAYS_FATAL_IF(abs(immed12) >= 0x800,
"LDR(B)/STR(B)/PLD immediate too big (%08x)",
immed12);
return (((uint32_t(immed12)>>31)^1)<<23) | (abs(immed12)&0x7FF);
}
uint32_t ARMAssemblerInterface::reg_scale_pre(int Rm, int type,
uint32_t shift, int W)
{
return (1<<25) | (1<<24) |
(((uint32_t(Rm)>>31)^1)<<23) | ((W&1)<<21) |
reg_imm(abs(Rm), type, shift);
}
uint32_t ARMAssemblerInterface::reg_scale_post(int Rm, int type, uint32_t shift)
{
return (1<<25) | (((uint32_t(Rm)>>31)^1)<<23) | reg_imm(abs(Rm), type, shift);
}
// LDRH/LDRSB/LDRSH/STRH (immediate and Rm can be negative, which indicate U=0)
uint32_t ARMAssemblerInterface::immed8_pre(int32_t immed8, int W)
{
uint32_t offset = abs(immed8);
LOG_ALWAYS_FATAL_IF(abs(immed8) >= 0x100,
"LDRH/LDRSB/LDRSH/STRH immediate too big (%08x)",
immed8);
return (1<<24) | (1<<22) | (((uint32_t(immed8)>>31)^1)<<23) |
((W&1)<<21) | (((offset&0xF0)<<4)|(offset&0xF));
}
uint32_t ARMAssemblerInterface::immed8_post(int32_t immed8)
{
uint32_t offset = abs(immed8);
LOG_ALWAYS_FATAL_IF(abs(immed8) >= 0x100,
"LDRH/LDRSB/LDRSH/STRH immediate too big (%08x)",
immed8);
return (1<<22) | (((uint32_t(immed8)>>31)^1)<<23) |
(((offset&0xF0)<<4) | (offset&0xF));
}
uint32_t ARMAssemblerInterface::reg_pre(int Rm, int W)
{
return (1<<24) | (((uint32_t(Rm)>>31)^1)<<23) | ((W&1)<<21) | (abs(Rm)&0xF);
}
uint32_t ARMAssemblerInterface::reg_post(int Rm)
{
return (((uint32_t(Rm)>>31)^1)<<23) | (abs(Rm)&0xF);
}
}; // namespace android
@@ -0,0 +1,327 @@
/* libs/pixelflinger/codeflinger/ARMAssemblerInterface.h
**
** Copyright 2006, The Android Open Source Project
**
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
**
** http://www.apache.org/licenses/LICENSE-2.0
**
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*/
#ifndef ANDROID_ARMASSEMBLER_INTERFACE_H
#define ANDROID_ARMASSEMBLER_INTERFACE_H
#include <stdint.h>
#include <sys/types.h>
namespace android {
// ----------------------------------------------------------------------------
class ARMAssemblerInterface
{
public:
virtual ~ARMAssemblerInterface();
enum {
EQ, NE, CS, CC, MI, PL, VS, VC, HI, LS, GE, LT, GT, LE, AL, NV,
HS = CS,
LO = CC
};
enum {
S = 1
};
enum {
LSL, LSR, ASR, ROR
};
enum {
ED, FD, EA, FA,
IB, IA, DB, DA
};
enum {
R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15,
SP = R13,
LR = R14,
PC = R15
};
enum {
#define LIST(rr) L##rr=1<<rr
LIST(R0), LIST(R1), LIST(R2), LIST(R3), LIST(R4), LIST(R5), LIST(R6),
LIST(R7), LIST(R8), LIST(R9), LIST(R10), LIST(R11), LIST(R12),
LIST(R13), LIST(R14), LIST(R15),
LIST(SP), LIST(LR), LIST(PC),
#undef LIST
LSAVED = LR4|LR5|LR6|LR7|LR8|LR9|LR10|LR11 | LLR
};
// -----------------------------------------------------------------------
// shifters and addressing modes
// -----------------------------------------------------------------------
// shifters...
static bool isValidImmediate(uint32_t immed);
static int buildImmediate(uint32_t i, uint32_t& rot, uint32_t& imm);
static uint32_t imm(uint32_t immediate);
static uint32_t reg_imm(int Rm, int type, uint32_t shift);
static uint32_t reg_rrx(int Rm);
static uint32_t reg_reg(int Rm, int type, int Rs);
// addressing modes...
// LDR(B)/STR(B)/PLD
// (immediate and Rm can be negative, which indicates U=0)
static uint32_t immed12_pre(int32_t immed12, int W=0);
static uint32_t immed12_post(int32_t immed12);
static uint32_t reg_scale_pre(int Rm, int type=0, uint32_t shift=0, int W=0);
static uint32_t reg_scale_post(int Rm, int type=0, uint32_t shift=0);
// LDRH/LDRSB/LDRSH/STRH
// (immediate and Rm can be negative, which indicates U=0)
static uint32_t immed8_pre(int32_t immed8, int W=0);
static uint32_t immed8_post(int32_t immed8);
static uint32_t reg_pre(int Rm, int W=0);
static uint32_t reg_post(int Rm);
// -----------------------------------------------------------------------
// basic instructions & code generation
// -----------------------------------------------------------------------
// generate the code
virtual void reset() = 0;
virtual int generate(const char* name) = 0;
virtual void disassemble(const char* name) = 0;
// construct prolog and epilog
virtual void prolog() = 0;
virtual void epilog(uint32_t touched) = 0;
virtual void comment(const char* string) = 0;
// data processing...
enum {
opAND, opEOR, opSUB, opRSB, opADD, opADC, opSBC, opRSC,
opTST, opTEQ, opCMP, opCMN, opORR, opMOV, opBIC, opMVN
};
virtual void
dataProcessing( int opcode, int cc, int s,
int Rd, int Rn,
uint32_t Op2) = 0;
// multiply...
virtual void MLA(int cc, int s,
int Rd, int Rm, int Rs, int Rn) = 0;
virtual void MUL(int cc, int s,
int Rd, int Rm, int Rs) = 0;
virtual void UMULL(int cc, int s,
int RdLo, int RdHi, int Rm, int Rs) = 0;
virtual void UMUAL(int cc, int s,
int RdLo, int RdHi, int Rm, int Rs) = 0;
virtual void SMULL(int cc, int s,
int RdLo, int RdHi, int Rm, int Rs) = 0;
virtual void SMUAL(int cc, int s,
int RdLo, int RdHi, int Rm, int Rs) = 0;
// branches...
virtual void B(int cc, uint32_t* pc) = 0;
virtual void BL(int cc, uint32_t* pc) = 0;
virtual void BX(int cc, int Rn) = 0;
virtual void label(const char* theLabel) = 0;
virtual void B(int cc, const char* label) = 0;
virtual void BL(int cc, const char* label) = 0;
// valid only after generate() has been called
virtual uint32_t* pcForLabel(const char* label) = 0;
// data transfer...
virtual void LDR (int cc, int Rd,
int Rn, uint32_t offset = immed12_pre(0)) = 0;
virtual void LDRB(int cc, int Rd,
int Rn, uint32_t offset = immed12_pre(0)) = 0;
virtual void STR (int cc, int Rd,
int Rn, uint32_t offset = immed12_pre(0)) = 0;
virtual void STRB(int cc, int Rd,
int Rn, uint32_t offset = immed12_pre(0)) = 0;
virtual void LDRH (int cc, int Rd,
int Rn, uint32_t offset = immed8_pre(0)) = 0;
virtual void LDRSB(int cc, int Rd,
int Rn, uint32_t offset = immed8_pre(0)) = 0;
virtual void LDRSH(int cc, int Rd,
int Rn, uint32_t offset = immed8_pre(0)) = 0;
virtual void STRH (int cc, int Rd,
int Rn, uint32_t offset = immed8_pre(0)) = 0;
// block data transfer...
virtual void LDM(int cc, int dir,
int Rn, int W, uint32_t reg_list) = 0;
virtual void STM(int cc, int dir,
int Rn, int W, uint32_t reg_list) = 0;
// special...
virtual void SWP(int cc, int Rn, int Rd, int Rm) = 0;
virtual void SWPB(int cc, int Rn, int Rd, int Rm) = 0;
virtual void SWI(int cc, uint32_t comment) = 0;
// DSP instructions...
enum {
// B=0, T=1
// yx
xyBB = 0, // 0000,
xyTB = 2, // 0010,
xyBT = 4, // 0100,
xyTT = 6, // 0110,
yB = 0, // 0000,
yT = 4, // 0100
};
virtual void PLD(int Rn, uint32_t offset) = 0;
virtual void CLZ(int cc, int Rd, int Rm) = 0;
virtual void QADD(int cc, int Rd, int Rm, int Rn) = 0;
virtual void QDADD(int cc, int Rd, int Rm, int Rn) = 0;
virtual void QSUB(int cc, int Rd, int Rm, int Rn) = 0;
virtual void QDSUB(int cc, int Rd, int Rm, int Rn) = 0;
virtual void SMUL(int cc, int xy,
int Rd, int Rm, int Rs) = 0;
virtual void SMULW(int cc, int y,
int Rd, int Rm, int Rs) = 0;
virtual void SMLA(int cc, int xy,
int Rd, int Rm, int Rs, int Rn) = 0;
virtual void SMLAL(int cc, int xy,
int RdHi, int RdLo, int Rs, int Rm) = 0;
virtual void SMLAW(int cc, int y,
int Rd, int Rm, int Rs, int Rn) = 0;
// byte/half word extract...
virtual void UXTB16(int cc, int Rd, int Rm, int rotate) = 0;
// -----------------------------------------------------------------------
// convenience...
// -----------------------------------------------------------------------
inline void
ADC(int cc, int s, int Rd, int Rn, uint32_t Op2) {
dataProcessing(opADC, cc, s, Rd, Rn, Op2);
}
inline void
ADD(int cc, int s, int Rd, int Rn, uint32_t Op2) {
dataProcessing(opADD, cc, s, Rd, Rn, Op2);
}
inline void
AND(int cc, int s, int Rd, int Rn, uint32_t Op2) {
dataProcessing(opAND, cc, s, Rd, Rn, Op2);
}
inline void
BIC(int cc, int s, int Rd, int Rn, uint32_t Op2) {
dataProcessing(opBIC, cc, s, Rd, Rn, Op2);
}
inline void
EOR(int cc, int s, int Rd, int Rn, uint32_t Op2) {
dataProcessing(opEOR, cc, s, Rd, Rn, Op2);
}
inline void
MOV(int cc, int s, int Rd, uint32_t Op2) {
dataProcessing(opMOV, cc, s, Rd, 0, Op2);
}
inline void
MVN(int cc, int s, int Rd, uint32_t Op2) {
dataProcessing(opMVN, cc, s, Rd, 0, Op2);
}
inline void
ORR(int cc, int s, int Rd, int Rn, uint32_t Op2) {
dataProcessing(opORR, cc, s, Rd, Rn, Op2);
}
inline void
RSB(int cc, int s, int Rd, int Rn, uint32_t Op2) {
dataProcessing(opRSB, cc, s, Rd, Rn, Op2);
}
inline void
RSC(int cc, int s, int Rd, int Rn, uint32_t Op2) {
dataProcessing(opRSC, cc, s, Rd, Rn, Op2);
}
inline void
SBC(int cc, int s, int Rd, int Rn, uint32_t Op2) {
dataProcessing(opSBC, cc, s, Rd, Rn, Op2);
}
inline void
SUB(int cc, int s, int Rd, int Rn, uint32_t Op2) {
dataProcessing(opSUB, cc, s, Rd, Rn, Op2);
}
inline void
TEQ(int cc, int Rn, uint32_t Op2) {
dataProcessing(opTEQ, cc, 1, 0, Rn, Op2);
}
inline void
TST(int cc, int Rn, uint32_t Op2) {
dataProcessing(opTST, cc, 1, 0, Rn, Op2);
}
inline void
CMP(int cc, int Rn, uint32_t Op2) {
dataProcessing(opCMP, cc, 1, 0, Rn, Op2);
}
inline void
CMN(int cc, int Rn, uint32_t Op2) {
dataProcessing(opCMN, cc, 1, 0, Rn, Op2);
}
inline void SMULBB(int cc, int Rd, int Rm, int Rs) {
SMUL(cc, xyBB, Rd, Rm, Rs); }
inline void SMULTB(int cc, int Rd, int Rm, int Rs) {
SMUL(cc, xyTB, Rd, Rm, Rs); }
inline void SMULBT(int cc, int Rd, int Rm, int Rs) {
SMUL(cc, xyBT, Rd, Rm, Rs); }
inline void SMULTT(int cc, int Rd, int Rm, int Rs) {
SMUL(cc, xyTT, Rd, Rm, Rs); }
inline void SMULWB(int cc, int Rd, int Rm, int Rs) {
SMULW(cc, yB, Rd, Rm, Rs); }
inline void SMULWT(int cc, int Rd, int Rm, int Rs) {
SMULW(cc, yT, Rd, Rm, Rs); }
inline void
SMLABB(int cc, int Rd, int Rm, int Rs, int Rn) {
SMLA(cc, xyBB, Rd, Rm, Rs, Rn); }
inline void
SMLATB(int cc, int Rd, int Rm, int Rs, int Rn) {
SMLA(cc, xyTB, Rd, Rm, Rs, Rn); }
inline void
SMLABT(int cc, int Rd, int Rm, int Rs, int Rn) {
SMLA(cc, xyBT, Rd, Rm, Rs, Rn); }
inline void
SMLATT(int cc, int Rd, int Rm, int Rs, int Rn) {
SMLA(cc, xyTT, Rd, Rm, Rs, Rn); }
inline void
SMLALBB(int cc, int RdHi, int RdLo, int Rs, int Rm) {
SMLAL(cc, xyBB, RdHi, RdLo, Rs, Rm); }
inline void
SMLALTB(int cc, int RdHi, int RdLo, int Rs, int Rm) {
SMLAL(cc, xyTB, RdHi, RdLo, Rs, Rm); }
inline void
SMLALBT(int cc, int RdHi, int RdLo, int Rs, int Rm) {
SMLAL(cc, xyBT, RdHi, RdLo, Rs, Rm); }
inline void
SMLALTT(int cc, int RdHi, int RdLo, int Rs, int Rm) {
SMLAL(cc, xyTT, RdHi, RdLo, Rs, Rm); }
inline void
SMLAWB(int cc, int Rd, int Rm, int Rs, int Rn) {
SMLAW(cc, yB, Rd, Rm, Rs, Rn); }
inline void
SMLAWT(int cc, int Rd, int Rm, int Rs, int Rn) {
SMLAW(cc, yT, Rd, Rm, Rs, Rn); }
};
}; // namespace android
#endif //ANDROID_ARMASSEMBLER_INTERFACE_H
@@ -0,0 +1,203 @@
/* libs/pixelflinger/codeflinger/ARMAssemblerProxy.cpp
**
** Copyright 2006, The Android Open Source Project
**
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
**
** http://www.apache.org/licenses/LICENSE-2.0
**
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*/
#include <stdint.h>
#include <sys/types.h>
#include "codeflinger/ARMAssemblerProxy.h"
namespace android {
// ----------------------------------------------------------------------------
ARMAssemblerProxy::ARMAssemblerProxy()
: mTarget(0)
{
}
ARMAssemblerProxy::ARMAssemblerProxy(ARMAssemblerInterface* target)
: mTarget(target)
{
}
ARMAssemblerProxy::~ARMAssemblerProxy()
{
delete mTarget;
}
void ARMAssemblerProxy::setTarget(ARMAssemblerInterface* target)
{
delete mTarget;
mTarget = target;
}
void ARMAssemblerProxy::reset() {
mTarget->reset();
}
int ARMAssemblerProxy::generate(const char* name) {
return mTarget->generate(name);
}
void ARMAssemblerProxy::disassemble(const char* name) {
return mTarget->disassemble(name);
}
void ARMAssemblerProxy::prolog() {
mTarget->prolog();
}
void ARMAssemblerProxy::epilog(uint32_t touched) {
mTarget->epilog(touched);
}
void ARMAssemblerProxy::comment(const char* string) {
mTarget->comment(string);
}
void ARMAssemblerProxy::dataProcessing( int opcode, int cc, int s,
int Rd, int Rn, uint32_t Op2)
{
mTarget->dataProcessing(opcode, cc, s, Rd, Rn, Op2);
}
void ARMAssemblerProxy::MLA(int cc, int s, int Rd, int Rm, int Rs, int Rn) {
mTarget->MLA(cc, s, Rd, Rm, Rs, Rn);
}
void ARMAssemblerProxy::MUL(int cc, int s, int Rd, int Rm, int Rs) {
mTarget->MUL(cc, s, Rd, Rm, Rs);
}
void ARMAssemblerProxy::UMULL(int cc, int s,
int RdLo, int RdHi, int Rm, int Rs) {
mTarget->UMULL(cc, s, RdLo, RdHi, Rm, Rs);
}
void ARMAssemblerProxy::UMUAL(int cc, int s,
int RdLo, int RdHi, int Rm, int Rs) {
mTarget->UMUAL(cc, s, RdLo, RdHi, Rm, Rs);
}
void ARMAssemblerProxy::SMULL(int cc, int s,
int RdLo, int RdHi, int Rm, int Rs) {
mTarget->SMULL(cc, s, RdLo, RdHi, Rm, Rs);
}
void ARMAssemblerProxy::SMUAL(int cc, int s,
int RdLo, int RdHi, int Rm, int Rs) {
mTarget->SMUAL(cc, s, RdLo, RdHi, Rm, Rs);
}
void ARMAssemblerProxy::B(int cc, uint32_t* pc) {
mTarget->B(cc, pc);
}
void ARMAssemblerProxy::BL(int cc, uint32_t* pc) {
mTarget->BL(cc, pc);
}
void ARMAssemblerProxy::BX(int cc, int Rn) {
mTarget->BX(cc, Rn);
}
void ARMAssemblerProxy::label(const char* theLabel) {
mTarget->label(theLabel);
}
void ARMAssemblerProxy::B(int cc, const char* label) {
mTarget->B(cc, label);
}
void ARMAssemblerProxy::BL(int cc, const char* label) {
mTarget->BL(cc, label);
}
uint32_t* ARMAssemblerProxy::pcForLabel(const char* label) {
return mTarget->pcForLabel(label);
}
void ARMAssemblerProxy::LDR(int cc, int Rd, int Rn, uint32_t offset) {
mTarget->LDR(cc, Rd, Rn, offset);
}
void ARMAssemblerProxy::LDRB(int cc, int Rd, int Rn, uint32_t offset) {
mTarget->LDRB(cc, Rd, Rn, offset);
}
void ARMAssemblerProxy::STR(int cc, int Rd, int Rn, uint32_t offset) {
mTarget->STR(cc, Rd, Rn, offset);
}
void ARMAssemblerProxy::STRB(int cc, int Rd, int Rn, uint32_t offset) {
mTarget->STRB(cc, Rd, Rn, offset);
}
void ARMAssemblerProxy::LDRH(int cc, int Rd, int Rn, uint32_t offset) {
mTarget->LDRH(cc, Rd, Rn, offset);
}
void ARMAssemblerProxy::LDRSB(int cc, int Rd, int Rn, uint32_t offset) {
mTarget->LDRSB(cc, Rd, Rn, offset);
}
void ARMAssemblerProxy::LDRSH(int cc, int Rd, int Rn, uint32_t offset) {
mTarget->LDRSH(cc, Rd, Rn, offset);
}
void ARMAssemblerProxy::STRH(int cc, int Rd, int Rn, uint32_t offset) {
mTarget->STRH(cc, Rd, Rn, offset);
}
void ARMAssemblerProxy::LDM(int cc, int dir, int Rn, int W, uint32_t reg_list) {
mTarget->LDM(cc, dir, Rn, W, reg_list);
}
void ARMAssemblerProxy::STM(int cc, int dir, int Rn, int W, uint32_t reg_list) {
mTarget->STM(cc, dir, Rn, W, reg_list);
}
void ARMAssemblerProxy::SWP(int cc, int Rn, int Rd, int Rm) {
mTarget->SWP(cc, Rn, Rd, Rm);
}
void ARMAssemblerProxy::SWPB(int cc, int Rn, int Rd, int Rm) {
mTarget->SWPB(cc, Rn, Rd, Rm);
}
void ARMAssemblerProxy::SWI(int cc, uint32_t comment) {
mTarget->SWI(cc, comment);
}
void ARMAssemblerProxy::PLD(int Rn, uint32_t offset) {
mTarget->PLD(Rn, offset);
}
void ARMAssemblerProxy::CLZ(int cc, int Rd, int Rm) {
mTarget->CLZ(cc, Rd, Rm);
}
void ARMAssemblerProxy::QADD(int cc, int Rd, int Rm, int Rn) {
mTarget->QADD(cc, Rd, Rm, Rn);
}
void ARMAssemblerProxy::QDADD(int cc, int Rd, int Rm, int Rn) {
mTarget->QDADD(cc, Rd, Rm, Rn);
}
void ARMAssemblerProxy::QSUB(int cc, int Rd, int Rm, int Rn) {
mTarget->QSUB(cc, Rd, Rm, Rn);
}
void ARMAssemblerProxy::QDSUB(int cc, int Rd, int Rm, int Rn) {
mTarget->QDSUB(cc, Rd, Rm, Rn);
}
void ARMAssemblerProxy::SMUL(int cc, int xy, int Rd, int Rm, int Rs) {
mTarget->SMUL(cc, xy, Rd, Rm, Rs);
}
void ARMAssemblerProxy::SMULW(int cc, int y, int Rd, int Rm, int Rs) {
mTarget->SMULW(cc, y, Rd, Rm, Rs);
}
void ARMAssemblerProxy::SMLA(int cc, int xy, int Rd, int Rm, int Rs, int Rn) {
mTarget->SMLA(cc, xy, Rd, Rm, Rs, Rn);
}
void ARMAssemblerProxy::SMLAL( int cc, int xy,
int RdHi, int RdLo, int Rs, int Rm) {
mTarget->SMLAL(cc, xy, RdHi, RdLo, Rs, Rm);
}
void ARMAssemblerProxy::SMLAW(int cc, int y, int Rd, int Rm, int Rs, int Rn) {
mTarget->SMLAW(cc, y, Rd, Rm, Rs, Rn);
}
void ARMAssemblerProxy::UXTB16(int cc, int Rd, int Rm, int rotate) {
mTarget->UXTB16(cc, Rd, Rm, rotate);
}
}; // namespace android
@@ -0,0 +1,125 @@
/* libs/pixelflinger/codeflinger/ARMAssemblerProxy.h
**
** Copyright 2006, The Android Open Source Project
**
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
**
** http://www.apache.org/licenses/LICENSE-2.0
**
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*/
#ifndef ANDROID_ARMASSEMBLER_PROXY_H
#define ANDROID_ARMASSEMBLER_PROXY_H
#include <stdint.h>
#include <sys/types.h>
#include "codeflinger/ARMAssemblerInterface.h"
namespace android {
// ----------------------------------------------------------------------------
class ARMAssemblerProxy : public ARMAssemblerInterface
{
public:
// ARMAssemblerProxy take ownership of the target
ARMAssemblerProxy();
ARMAssemblerProxy(ARMAssemblerInterface* target);
virtual ~ARMAssemblerProxy();
void setTarget(ARMAssemblerInterface* target);
virtual void reset();
virtual int generate(const char* name);
virtual void disassemble(const char* name);
virtual void prolog();
virtual void epilog(uint32_t touched);
virtual void comment(const char* string);
virtual void dataProcessing(int opcode, int cc, int s,
int Rd, int Rn,
uint32_t Op2);
virtual void MLA(int cc, int s,
int Rd, int Rm, int Rs, int Rn);
virtual void MUL(int cc, int s,
int Rd, int Rm, int Rs);
virtual void UMULL(int cc, int s,
int RdLo, int RdHi, int Rm, int Rs);
virtual void UMUAL(int cc, int s,
int RdLo, int RdHi, int Rm, int Rs);
virtual void SMULL(int cc, int s,
int RdLo, int RdHi, int Rm, int Rs);
virtual void SMUAL(int cc, int s,
int RdLo, int RdHi, int Rm, int Rs);
virtual void B(int cc, uint32_t* pc);
virtual void BL(int cc, uint32_t* pc);
virtual void BX(int cc, int Rn);
virtual void label(const char* theLabel);
virtual void B(int cc, const char* label);
virtual void BL(int cc, const char* label);
uint32_t* pcForLabel(const char* label);
virtual void LDR (int cc, int Rd,
int Rn, uint32_t offset = immed12_pre(0));
virtual void LDRB(int cc, int Rd,
int Rn, uint32_t offset = immed12_pre(0));
virtual void STR (int cc, int Rd,
int Rn, uint32_t offset = immed12_pre(0));
virtual void STRB(int cc, int Rd,
int Rn, uint32_t offset = immed12_pre(0));
virtual void LDRH (int cc, int Rd,
int Rn, uint32_t offset = immed8_pre(0));
virtual void LDRSB(int cc, int Rd,
int Rn, uint32_t offset = immed8_pre(0));
virtual void LDRSH(int cc, int Rd,
int Rn, uint32_t offset = immed8_pre(0));
virtual void STRH (int cc, int Rd,
int Rn, uint32_t offset = immed8_pre(0));
virtual void LDM(int cc, int dir,
int Rn, int W, uint32_t reg_list);
virtual void STM(int cc, int dir,
int Rn, int W, uint32_t reg_list);
virtual void SWP(int cc, int Rn, int Rd, int Rm);
virtual void SWPB(int cc, int Rn, int Rd, int Rm);
virtual void SWI(int cc, uint32_t comment);
virtual void PLD(int Rn, uint32_t offset);
virtual void CLZ(int cc, int Rd, int Rm);
virtual void QADD(int cc, int Rd, int Rm, int Rn);
virtual void QDADD(int cc, int Rd, int Rm, int Rn);
virtual void QSUB(int cc, int Rd, int Rm, int Rn);
virtual void QDSUB(int cc, int Rd, int Rm, int Rn);
virtual void SMUL(int cc, int xy,
int Rd, int Rm, int Rs);
virtual void SMULW(int cc, int y,
int Rd, int Rm, int Rs);
virtual void SMLA(int cc, int xy,
int Rd, int Rm, int Rs, int Rn);
virtual void SMLAL(int cc, int xy,
int RdHi, int RdLo, int Rs, int Rm);
virtual void SMLAW(int cc, int y,
int Rd, int Rm, int Rs, int Rn);
virtual void UXTB16(int cc, int Rd, int Rm, int rotate);
private:
ARMAssemblerInterface* mTarget;
};
}; // namespace android
#endif //ANDROID_ARMASSEMBLER_PROXY_H
@@ -0,0 +1,173 @@
/* libs/pixelflinger/codeflinger/CodeCache.cpp
**
** Copyright 2006, The Android Open Source Project
**
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
**
** http://www.apache.org/licenses/LICENSE-2.0
**
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*/
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/mman.h>
#include <cutils/log.h>
#include <cutils/atomic.h>
#include "codeflinger/CodeCache.h"
namespace android {
// ----------------------------------------------------------------------------
#if defined(__arm__)
#include <unistd.h>
#include <errno.h>
#endif
// ----------------------------------------------------------------------------
Assembly::Assembly(size_t size)
: mCount(1), mSize(0)
{
mBase = (uint32_t*)mspace_malloc(getMspace(), size);
mSize = size;
ensureMbaseExecutable();
}
Assembly::~Assembly()
{
mspace_free(getMspace(), mBase);
}
void Assembly::incStrong(const void*) const
{
android_atomic_inc(&mCount);
}
void Assembly::decStrong(const void*) const
{
if (android_atomic_dec(&mCount) == 1) {
delete this;
}
}
ssize_t Assembly::size() const
{
if (!mBase) return NO_MEMORY;
return mSize;
}
uint32_t* Assembly::base() const
{
return mBase;
}
ssize_t Assembly::resize(size_t newSize)
{
mBase = (uint32_t*)mspace_realloc(getMspace(), mBase, newSize);
mSize = newSize;
ensureMbaseExecutable();
return size();
}
mspace Assembly::getMspace()
{
static mspace msp = create_contiguous_mspace(2 * 1024, 1024 * 1024, /*locked=*/ false);
return msp;
}
void Assembly::ensureMbaseExecutable()
{
long pagesize = sysconf(_SC_PAGESIZE);
long pagemask = ~(pagesize - 1); // assumes pagesize is a power of 2
uint32_t* pageStart = (uint32_t*) (((uintptr_t) mBase) & pagemask);
size_t adjustedLength = (mBase - pageStart) * sizeof(uint32_t) + mSize;
if (mBase && mprotect(pageStart, adjustedLength, PROT_READ | PROT_WRITE | PROT_EXEC) != 0) {
mspace_free(getMspace(), mBase);
mBase = NULL;
}
}
// ----------------------------------------------------------------------------
CodeCache::CodeCache(size_t size)
: mCacheSize(size), mCacheInUse(0)
{
pthread_mutex_init(&mLock, 0);
}
CodeCache::~CodeCache()
{
pthread_mutex_destroy(&mLock);
}
sp<Assembly> CodeCache::lookup(const AssemblyKeyBase& keyBase) const
{
pthread_mutex_lock(&mLock);
sp<Assembly> r;
ssize_t index = mCacheData.indexOfKey(key_t(keyBase));
if (index >= 0) {
const cache_entry_t& e = mCacheData.valueAt(index);
e.when = mWhen++;
r = e.entry;
}
pthread_mutex_unlock(&mLock);
return r;
}
int CodeCache::cache( const AssemblyKeyBase& keyBase,
const sp<Assembly>& assembly)
{
pthread_mutex_lock(&mLock);
const ssize_t assemblySize = assembly->size();
while (mCacheInUse + assemblySize > mCacheSize) {
// evict the LRU
size_t lru = 0;
size_t count = mCacheData.size();
for (size_t i=0 ; i<count ; i++) {
const cache_entry_t& e = mCacheData.valueAt(i);
if (e.when < mCacheData.valueAt(lru).when) {
lru = i;
}
}
const cache_entry_t& e = mCacheData.valueAt(lru);
mCacheInUse -= e.entry->size();
mCacheData.removeItemsAt(lru);
}
ssize_t err = mCacheData.add(key_t(keyBase), cache_entry_t(assembly, mWhen));
if (err >= 0) {
mCacheInUse += assemblySize;
mWhen++;
// synchronize caches...
#if defined(__arm__)
const long base = long(assembly->base());
const long curr = base + long(assembly->size());
err = cacheflush(base, curr, 0);
LOGE_IF(err, "__ARM_NR_cacheflush error %s\n",
strerror(errno));
#endif
}
pthread_mutex_unlock(&mLock);
return err;
}
// ----------------------------------------------------------------------------
}; // namespace android
@@ -0,0 +1,137 @@
/* libs/pixelflinger/codeflinger/CodeCache.h
**
** Copyright 2006, The Android Open Source Project
**
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
**
** http://www.apache.org/licenses/LICENSE-2.0
**
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*/
#ifndef ANDROID_CODECACHE_H
#define ANDROID_CODECACHE_H
#include <stdint.h>
#include <pthread.h>
#include <sys/types.h>
#include <cutils/mspace.h>
#include "tinyutils/KeyedVector.h"
#include "tinyutils/smartpointer.h"
namespace android {
// ----------------------------------------------------------------------------
class AssemblyKeyBase {
public:
virtual ~AssemblyKeyBase() { }
virtual int compare_type(const AssemblyKeyBase& key) const = 0;
};
template <typename T>
class AssemblyKey : public AssemblyKeyBase
{
public:
AssemblyKey(const T& rhs) : mKey(rhs) { }
virtual int compare_type(const AssemblyKeyBase& key) const {
const T& rhs = static_cast<const AssemblyKey&>(key).mKey;
return android::compare_type(mKey, rhs);
}
private:
T mKey;
};
// ----------------------------------------------------------------------------
class Assembly
{
public:
Assembly(size_t size);
virtual ~Assembly();
ssize_t size() const;
uint32_t* base() const;
ssize_t resize(size_t size);
// protocol for sp<>
void incStrong(const void* id) const;
void decStrong(const void* id) const;
typedef void weakref_type;
private:
static mspace getMspace();
void ensureMbaseExecutable();
mutable int32_t mCount;
uint32_t* mBase;
size_t mSize;
};
// ----------------------------------------------------------------------------
class CodeCache
{
public:
// pretty simple cache API...
CodeCache(size_t size);
~CodeCache();
sp<Assembly> lookup(const AssemblyKeyBase& key) const;
int cache( const AssemblyKeyBase& key,
const sp<Assembly>& assembly);
private:
// nothing to see here...
struct cache_entry_t {
inline cache_entry_t() { }
inline cache_entry_t(const sp<Assembly>& a, int64_t w)
: entry(a), when(w) { }
sp<Assembly> entry;
mutable int64_t when;
};
class key_t {
friend int compare_type(
const key_value_pair_t<key_t, cache_entry_t>&,
const key_value_pair_t<key_t, cache_entry_t>&);
const AssemblyKeyBase* mKey;
public:
key_t() { };
key_t(const AssemblyKeyBase& k) : mKey(&k) { }
};
mutable pthread_mutex_t mLock;
mutable int64_t mWhen;
size_t mCacheSize;
size_t mCacheInUse;
KeyedVector<key_t, cache_entry_t> mCacheData;
friend int compare_type(
const key_value_pair_t<key_t, cache_entry_t>&,
const key_value_pair_t<key_t, cache_entry_t>&);
};
// KeyedVector uses compare_type(), which is more efficient, than
// just using operator < ()
inline int compare_type(
const key_value_pair_t<CodeCache::key_t, CodeCache::cache_entry_t>& lhs,
const key_value_pair_t<CodeCache::key_t, CodeCache::cache_entry_t>& rhs)
{
return lhs.key.mKey->compare_type(*(rhs.key.mKey));
}
// ----------------------------------------------------------------------------
}; // namespace android
#endif //ANDROID_CODECACHE_H
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,554 @@
/* libs/pixelflinger/codeflinger/GGLAssembler.h
**
** Copyright 2006, The Android Open Source Project
**
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
**
** http://www.apache.org/licenses/LICENSE-2.0
**
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*/
#ifndef ANDROID_GGLASSEMBLER_H
#define ANDROID_GGLASSEMBLER_H
#include <stdint.h>
#include <sys/types.h>
#include <private/pixelflinger/ggl_context.h>
#include "codeflinger/ARMAssemblerProxy.h"
namespace android {
// ----------------------------------------------------------------------------
#define CONTEXT_LOAD(REG, FIELD) \
LDR(AL, REG, mBuilderContext.Rctx, immed12_pre(GGL_OFFSETOF(FIELD)))
#define CONTEXT_STORE(REG, FIELD) \
STR(AL, REG, mBuilderContext.Rctx, immed12_pre(GGL_OFFSETOF(FIELD)))
class RegisterAllocator
{
public:
class RegisterFile;
RegisterFile& registerFile();
int reserveReg(int reg);
int obtainReg();
void recycleReg(int reg);
void reset();
class RegisterFile
{
public:
RegisterFile();
RegisterFile(const RegisterFile& rhs);
~RegisterFile();
void reset();
bool operator == (const RegisterFile& rhs) const;
bool operator != (const RegisterFile& rhs) const {
return !operator == (rhs);
}
int reserve(int reg);
void reserveSeveral(uint32_t regMask);
void recycle(int reg);
void recycleSeveral(uint32_t regMask);
int obtain();
inline int isUsed(int reg) const;
bool hasFreeRegs() const;
int countFreeRegs() const;
uint32_t touched() const;
inline uint32_t status() const { return mStatus; }
enum {
OUT_OF_REGISTERS = 0x1
};
private:
uint32_t mRegs;
uint32_t mTouched;
uint32_t mStatus;
};
class Scratch
{
public:
Scratch(RegisterFile& regFile)
: mRegFile(regFile), mScratch(0) {
}
~Scratch() {
mRegFile.recycleSeveral(mScratch);
}
int obtain() {
int reg = mRegFile.obtain();
mScratch |= 1<<reg;
return reg;
}
void recycle(int reg) {
mRegFile.recycle(reg);
mScratch &= ~(1<<reg);
}
bool isUsed(int reg) {
return (mScratch & (1<<reg));
}
int countFreeRegs() {
return mRegFile.countFreeRegs();
}
private:
RegisterFile& mRegFile;
uint32_t mScratch;
};
class Spill
{
public:
Spill(RegisterFile& regFile, ARMAssemblerInterface& gen, uint32_t reglist)
: mRegFile(regFile), mGen(gen), mRegList(reglist), mCount(0)
{
if (reglist) {
int count = 0;
while (reglist) {
count++;
reglist &= ~(1 << (31 - __builtin_clz(reglist)));
}
if (count == 1) {
int reg = 31 - __builtin_clz(mRegList);
mGen.STR(mGen.AL, reg, mGen.SP, mGen.immed12_pre(-4, 1));
} else {
mGen.STM(mGen.AL, mGen.DB, mGen.SP, 1, mRegList);
}
mRegFile.recycleSeveral(mRegList);
mCount = count;
}
}
~Spill() {
if (mRegList) {
if (mCount == 1) {
int reg = 31 - __builtin_clz(mRegList);
mGen.LDR(mGen.AL, reg, mGen.SP, mGen.immed12_post(4));
} else {
mGen.LDM(mGen.AL, mGen.IA, mGen.SP, 1, mRegList);
}
mRegFile.reserveSeveral(mRegList);
}
}
private:
RegisterFile& mRegFile;
ARMAssemblerInterface& mGen;
uint32_t mRegList;
int mCount;
};
private:
RegisterFile mRegs;
};
// ----------------------------------------------------------------------------
class GGLAssembler : public ARMAssemblerProxy, public RegisterAllocator
{
public:
GGLAssembler(ARMAssemblerInterface* target);
virtual ~GGLAssembler();
uint32_t* base() const { return 0; } // XXX
uint32_t* pc() const { return 0; } // XXX
void reset(int opt_level);
virtual void prolog();
virtual void epilog(uint32_t touched);
// generate scanline code for given needs
int scanline(const needs_t& needs, context_t const* c);
int scanline_core(const needs_t& needs, context_t const* c);
enum {
CLEAR_LO = 0x0001,
CLEAR_HI = 0x0002,
CORRUPTIBLE = 0x0004,
FIRST = 0x0008
};
enum { //load/store flags
WRITE_BACK = 0x0001
};
struct reg_t {
reg_t() : reg(-1), flags(0) {
}
reg_t(int r, int f=0)
: reg(r), flags(f) {
}
void setTo(int r, int f=0) {
reg=r; flags=f;
}
int reg;
uint16_t flags;
};
struct integer_t : public reg_t {
integer_t() : reg_t(), s(0) {
}
integer_t(int r, int sz=32, int f=0)
: reg_t(r, f), s(sz) {
}
void setTo(int r, int sz=32, int f=0) {
reg_t::setTo(r, f); s=sz;
}
int8_t s;
inline int size() const { return s; }
};
struct pixel_t : public reg_t {
pixel_t() : reg_t() {
memset(&format, 0, sizeof(GGLFormat));
}
pixel_t(int r, const GGLFormat* fmt, int f=0)
: reg_t(r, f), format(*fmt) {
}
void setTo(int r, const GGLFormat* fmt, int f=0) {
reg_t::setTo(r, f); format = *fmt;
}
GGLFormat format;
inline int hi(int c) const { return format.c[c].h; }
inline int low(int c) const { return format.c[c].l; }
inline int mask(int c) const { return ((1<<size(c))-1) << low(c); }
inline int size() const { return format.size*8; }
inline int size(int c) const { return component_size(c); }
inline int component_size(int c) const { return hi(c) - low(c); }
};
struct component_t : public reg_t {
component_t() : reg_t(), h(0), l(0) {
}
component_t(int r, int f=0)
: reg_t(r, f), h(0), l(0) {
}
component_t(int r, int lo, int hi, int f=0)
: reg_t(r, f), h(hi), l(lo) {
}
explicit component_t(const integer_t& rhs)
: reg_t(rhs.reg, rhs.flags), h(rhs.s), l(0) {
}
explicit component_t(const pixel_t& rhs, int component) {
setTo( rhs.reg,
rhs.format.c[component].l,
rhs.format.c[component].h,
rhs.flags|CLEAR_LO|CLEAR_HI);
}
void setTo(int r, int lo=0, int hi=0, int f=0) {
reg_t::setTo(r, f); h=hi; l=lo;
}
int8_t h;
int8_t l;
inline int size() const { return h-l; }
};
struct pointer_t : public reg_t {
pointer_t() : reg_t(), size(0) {
}
pointer_t(int r, int s, int f=0)
: reg_t(r, f), size(s) {
}
void setTo(int r, int s, int f=0) {
reg_t::setTo(r, f); size=s;
}
int8_t size;
};
private:
struct tex_coord_t {
reg_t s;
reg_t t;
pointer_t ptr;
};
struct fragment_parts_t {
uint32_t packed : 1;
uint32_t reload : 2;
uint32_t iterated_packed : 1;
pixel_t iterated;
pointer_t cbPtr;
pointer_t covPtr;
reg_t count;
reg_t argb[4];
reg_t argb_dx[4];
reg_t z;
reg_t dither;
pixel_t texel[GGL_TEXTURE_UNIT_COUNT];
tex_coord_t coords[GGL_TEXTURE_UNIT_COUNT];
};
struct texture_unit_t {
int format_idx;
GGLFormat format;
int bits;
int swrap;
int twrap;
int env;
int pot;
int linear;
uint8_t mask;
uint8_t replaced;
};
struct texture_machine_t {
texture_unit_t tmu[GGL_TEXTURE_UNIT_COUNT];
uint8_t mask;
uint8_t replaced;
uint8_t directTexture;
uint8_t activeUnits;
};
struct component_info_t {
bool masked : 1;
bool inDest : 1;
bool needed : 1;
bool replaced : 1;
bool iterated : 1;
bool smooth : 1;
bool blend : 1;
bool fog : 1;
};
struct builder_context_t {
context_t const* c;
needs_t needs;
int Rctx;
};
template <typename T>
void modify(T& r, Scratch& regs)
{
if (!(r.flags & CORRUPTIBLE)) {
r.reg = regs.obtain();
r.flags |= CORRUPTIBLE;
}
}
// helpers
void base_offset(const pointer_t& d, const pointer_t& b, const reg_t& o);
// texture environement
void modulate( component_t& dest,
const component_t& incoming,
const pixel_t& texel, int component);
void decal( component_t& dest,
const component_t& incoming,
const pixel_t& texel, int component);
void blend( component_t& dest,
const component_t& incoming,
const pixel_t& texel, int component, int tmu);
void add( component_t& dest,
const component_t& incoming,
const pixel_t& texel, int component);
// load/store stuff
void store(const pointer_t& addr, const pixel_t& src, uint32_t flags=0);
void load(const pointer_t& addr, const pixel_t& dest, uint32_t flags=0);
void extract(integer_t& d, const pixel_t& s, int component);
void extract(component_t& d, const pixel_t& s, int component);
void extract(integer_t& d, int s, int h, int l, int bits=32);
void expand(integer_t& d, const integer_t& s, int dbits);
void expand(integer_t& d, const component_t& s, int dbits);
void expand(component_t& d, const component_t& s, int dbits);
void downshift(pixel_t& d, int component, component_t s, const reg_t& dither);
void mul_factor( component_t& d,
const integer_t& v,
const integer_t& f);
void mul_factor_add( component_t& d,
const integer_t& v,
const integer_t& f,
const component_t& a);
void component_add( component_t& d,
const integer_t& dst,
const integer_t& src);
void component_sat( const component_t& v);
void build_scanline_prolog( fragment_parts_t& parts,
const needs_t& needs);
void build_smooth_shade(const fragment_parts_t& parts);
void build_component( pixel_t& pixel,
const fragment_parts_t& parts,
int component,
Scratch& global_scratches);
void build_incoming_component(
component_t& temp,
int dst_size,
const fragment_parts_t& parts,
int component,
Scratch& scratches,
Scratch& global_scratches);
void init_iterated_color(fragment_parts_t& parts, const reg_t& x);
void build_iterated_color( component_t& fragment,
const fragment_parts_t& parts,
int component,
Scratch& regs);
void decodeLogicOpNeeds(const needs_t& needs);
void decodeTMUNeeds(const needs_t& needs, context_t const* c);
void init_textures( tex_coord_t* coords,
const reg_t& x,
const reg_t& y);
void build_textures( fragment_parts_t& parts,
Scratch& regs);
void filter8( const fragment_parts_t& parts,
pixel_t& texel, const texture_unit_t& tmu,
int U, int V, pointer_t& txPtr,
int FRAC_BITS);
void filter16( const fragment_parts_t& parts,
pixel_t& texel, const texture_unit_t& tmu,
int U, int V, pointer_t& txPtr,
int FRAC_BITS);
void filter24( const fragment_parts_t& parts,
pixel_t& texel, const texture_unit_t& tmu,
int U, int V, pointer_t& txPtr,
int FRAC_BITS);
void filter32( const fragment_parts_t& parts,
pixel_t& texel, const texture_unit_t& tmu,
int U, int V, pointer_t& txPtr,
int FRAC_BITS);
void build_texture_environment( component_t& fragment,
const fragment_parts_t& parts,
int component,
Scratch& regs);
void wrapping( int d,
int coord, int size,
int tx_wrap, int tx_linear);
void build_fog( component_t& temp,
int component,
Scratch& parent_scratches);
void build_blending( component_t& in_out,
const pixel_t& pixel,
int component,
Scratch& parent_scratches);
void build_blend_factor(
integer_t& factor, int f, int component,
const pixel_t& dst_pixel,
integer_t& fragment,
integer_t& fb,
Scratch& scratches);
void build_blendFOneMinusF( component_t& temp,
const integer_t& factor,
const integer_t& fragment,
const integer_t& fb);
void build_blendOneMinusFF( component_t& temp,
const integer_t& factor,
const integer_t& fragment,
const integer_t& fb);
void build_coverage_application(component_t& fragment,
const fragment_parts_t& parts,
Scratch& regs);
void build_alpha_test(component_t& fragment, const fragment_parts_t& parts);
enum { Z_TEST=1, Z_WRITE=2 };
void build_depth_test(const fragment_parts_t& parts, uint32_t mask);
void build_iterate_z(const fragment_parts_t& parts);
void build_iterate_f(const fragment_parts_t& parts);
void build_iterate_texture_coordinates(const fragment_parts_t& parts);
void build_logic_op(pixel_t& pixel, Scratch& regs);
void build_masking(pixel_t& pixel, Scratch& regs);
void build_and_immediate(int d, int s, uint32_t mask, int bits);
bool isAlphaSourceNeeded() const;
enum {
FACTOR_SRC=1, FACTOR_DST=2, BLEND_SRC=4, BLEND_DST=8
};
enum {
LOGIC_OP=1, LOGIC_OP_SRC=2, LOGIC_OP_DST=4
};
static int blending_codes(int fs, int fd);
builder_context_t mBuilderContext;
texture_machine_t mTextureMachine;
component_info_t mInfo[4];
int mBlending;
int mMasking;
int mAllMasked;
int mLogicOp;
int mAlphaTest;
int mAA;
int mDithering;
int mDepthTest;
int mSmooth;
int mFog;
pixel_t mDstPixel;
GGLFormat mCbFormat;
int mBlendFactorCached;
integer_t mAlphaSource;
int mBaseRegister;
int mBlendSrc;
int mBlendDst;
int mBlendSrcA;
int mBlendDstA;
int mOptLevel;
};
// ----------------------------------------------------------------------------
}; // namespace android
#endif // ANDROID_GGLASSEMBLER_H
@@ -0,0 +1,300 @@
/* $NetBSD: armreg.h,v 1.28 2003/10/31 16:30:15 scw Exp $ */
/*-
* Copyright (c) 1998, 2001 Ben Harris
* Copyright (c) 1994-1996 Mark Brinicombe.
* Copyright (c) 1994 Brini.
* All rights reserved.
*
* This code is derived from software written for Brini by Mark Brinicombe
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Brini.
* 4. The name of the company nor the name of the author may be used to
* endorse or promote products derived from this software without specific
* prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD: /repoman/r/ncvs/src/sys/arm/include/armreg.h,v 1.3 2005/11/21 19:06:25 cognet Exp $
*/
#ifndef MACHINE_ARMREG_H
#define MACHINE_ARMREG_H
#define INSN_SIZE 4
#define INSN_COND_MASK 0xf0000000 /* Condition mask */
#define PSR_MODE 0x0000001f /* mode mask */
#define PSR_USR26_MODE 0x00000000
#define PSR_FIQ26_MODE 0x00000001
#define PSR_IRQ26_MODE 0x00000002
#define PSR_SVC26_MODE 0x00000003
#define PSR_USR32_MODE 0x00000010
#define PSR_FIQ32_MODE 0x00000011
#define PSR_IRQ32_MODE 0x00000012
#define PSR_SVC32_MODE 0x00000013
#define PSR_ABT32_MODE 0x00000017
#define PSR_UND32_MODE 0x0000001b
#define PSR_SYS32_MODE 0x0000001f
#define PSR_32_MODE 0x00000010
#define PSR_FLAGS 0xf0000000 /* flags */
#define PSR_C_bit (1 << 29) /* carry */
/* The high-order byte is always the implementor */
#define CPU_ID_IMPLEMENTOR_MASK 0xff000000
#define CPU_ID_ARM_LTD 0x41000000 /* 'A' */
#define CPU_ID_DEC 0x44000000 /* 'D' */
#define CPU_ID_INTEL 0x69000000 /* 'i' */
#define CPU_ID_TI 0x54000000 /* 'T' */
/* How to decide what format the CPUID is in. */
#define CPU_ID_ISOLD(x) (((x) & 0x0000f000) == 0x00000000)
#define CPU_ID_IS7(x) (((x) & 0x0000f000) == 0x00007000)
#define CPU_ID_ISNEW(x) (!CPU_ID_ISOLD(x) && !CPU_ID_IS7(x))
/* On ARM3 and ARM6, this byte holds the foundry ID. */
#define CPU_ID_FOUNDRY_MASK 0x00ff0000
#define CPU_ID_FOUNDRY_VLSI 0x00560000
/* On ARM7 it holds the architecture and variant (sub-model) */
#define CPU_ID_7ARCH_MASK 0x00800000
#define CPU_ID_7ARCH_V3 0x00000000
#define CPU_ID_7ARCH_V4T 0x00800000
#define CPU_ID_7VARIANT_MASK 0x007f0000
/* On more recent ARMs, it does the same, but in a different format */
#define CPU_ID_ARCH_MASK 0x000f0000
#define CPU_ID_ARCH_V3 0x00000000
#define CPU_ID_ARCH_V4 0x00010000
#define CPU_ID_ARCH_V4T 0x00020000
#define CPU_ID_ARCH_V5 0x00030000
#define CPU_ID_ARCH_V5T 0x00040000
#define CPU_ID_ARCH_V5TE 0x00050000
#define CPU_ID_VARIANT_MASK 0x00f00000
/* Next three nybbles are part number */
#define CPU_ID_PARTNO_MASK 0x0000fff0
/* Intel XScale has sub fields in part number */
#define CPU_ID_XSCALE_COREGEN_MASK 0x0000e000 /* core generation */
#define CPU_ID_XSCALE_COREREV_MASK 0x00001c00 /* core revision */
#define CPU_ID_XSCALE_PRODUCT_MASK 0x000003f0 /* product number */
/* And finally, the revision number. */
#define CPU_ID_REVISION_MASK 0x0000000f
/* Individual CPUs are probably best IDed by everything but the revision. */
#define CPU_ID_CPU_MASK 0xfffffff0
/* Fake CPU IDs for ARMs without CP15 */
#define CPU_ID_ARM2 0x41560200
#define CPU_ID_ARM250 0x41560250
/* Pre-ARM7 CPUs -- [15:12] == 0 */
#define CPU_ID_ARM3 0x41560300
#define CPU_ID_ARM600 0x41560600
#define CPU_ID_ARM610 0x41560610
#define CPU_ID_ARM620 0x41560620
/* ARM7 CPUs -- [15:12] == 7 */
#define CPU_ID_ARM700 0x41007000 /* XXX This is a guess. */
#define CPU_ID_ARM710 0x41007100
#define CPU_ID_ARM7500 0x41027100 /* XXX This is a guess. */
#define CPU_ID_ARM710A 0x41047100 /* inc ARM7100 */
#define CPU_ID_ARM7500FE 0x41077100
#define CPU_ID_ARM710T 0x41807100
#define CPU_ID_ARM720T 0x41807200
#define CPU_ID_ARM740T8K 0x41807400 /* XXX no MMU, 8KB cache */
#define CPU_ID_ARM740T4K 0x41817400 /* XXX no MMU, 4KB cache */
/* Post-ARM7 CPUs */
#define CPU_ID_ARM810 0x41018100
#define CPU_ID_ARM920T 0x41129200
#define CPU_ID_ARM920T_ALT 0x41009200
#define CPU_ID_ARM922T 0x41029220
#define CPU_ID_ARM940T 0x41029400 /* XXX no MMU */
#define CPU_ID_ARM946ES 0x41049460 /* XXX no MMU */
#define CPU_ID_ARM966ES 0x41049660 /* XXX no MMU */
#define CPU_ID_ARM966ESR1 0x41059660 /* XXX no MMU */
#define CPU_ID_ARM1020E 0x4115a200 /* (AKA arm10 rev 1) */
#define CPU_ID_ARM1022ES 0x4105a220
#define CPU_ID_SA110 0x4401a100
#define CPU_ID_SA1100 0x4401a110
#define CPU_ID_TI925T 0x54029250
#define CPU_ID_SA1110 0x6901b110
#define CPU_ID_IXP1200 0x6901c120
#define CPU_ID_80200 0x69052000
#define CPU_ID_PXA250 0x69052100 /* sans core revision */
#define CPU_ID_PXA210 0x69052120
#define CPU_ID_PXA250A 0x69052100 /* 1st version Core */
#define CPU_ID_PXA210A 0x69052120 /* 1st version Core */
#define CPU_ID_PXA250B 0x69052900 /* 3rd version Core */
#define CPU_ID_PXA210B 0x69052920 /* 3rd version Core */
#define CPU_ID_PXA250C 0x69052d00 /* 4th version Core */
#define CPU_ID_PXA210C 0x69052d20 /* 4th version Core */
#define CPU_ID_80321_400 0x69052420
#define CPU_ID_80321_600 0x69052430
#define CPU_ID_80321_400_B0 0x69052c20
#define CPU_ID_80321_600_B0 0x69052c30
#define CPU_ID_IXP425_533 0x690541c0
#define CPU_ID_IXP425_400 0x690541d0
#define CPU_ID_IXP425_266 0x690541f0
/* ARM3-specific coprocessor 15 registers */
#define ARM3_CP15_FLUSH 1
#define ARM3_CP15_CONTROL 2
#define ARM3_CP15_CACHEABLE 3
#define ARM3_CP15_UPDATEABLE 4
#define ARM3_CP15_DISRUPTIVE 5
/* ARM3 Control register bits */
#define ARM3_CTL_CACHE_ON 0x00000001
#define ARM3_CTL_SHARED 0x00000002
#define ARM3_CTL_MONITOR 0x00000004
/*
* Post-ARM3 CP15 registers:
*
* 1 Control register
*
* 2 Translation Table Base
*
* 3 Domain Access Control
*
* 4 Reserved
*
* 5 Fault Status
*
* 6 Fault Address
*
* 7 Cache/write-buffer Control
*
* 8 TLB Control
*
* 9 Cache Lockdown
*
* 10 TLB Lockdown
*
* 11 Reserved
*
* 12 Reserved
*
* 13 Process ID (for FCSE)
*
* 14 Reserved
*
* 15 Implementation Dependent
*/
/* Some of the definitions below need cleaning up for V3/V4 architectures */
/* CPU control register (CP15 register 1) */
#define CPU_CONTROL_MMU_ENABLE 0x00000001 /* M: MMU/Protection unit enable */
#define CPU_CONTROL_AFLT_ENABLE 0x00000002 /* A: Alignment fault enable */
#define CPU_CONTROL_DC_ENABLE 0x00000004 /* C: IDC/DC enable */
#define CPU_CONTROL_WBUF_ENABLE 0x00000008 /* W: Write buffer enable */
#define CPU_CONTROL_32BP_ENABLE 0x00000010 /* P: 32-bit exception handlers */
#define CPU_CONTROL_32BD_ENABLE 0x00000020 /* D: 32-bit addressing */
#define CPU_CONTROL_LABT_ENABLE 0x00000040 /* L: Late abort enable */
#define CPU_CONTROL_BEND_ENABLE 0x00000080 /* B: Big-endian mode */
#define CPU_CONTROL_SYST_ENABLE 0x00000100 /* S: System protection bit */
#define CPU_CONTROL_ROM_ENABLE 0x00000200 /* R: ROM protection bit */
#define CPU_CONTROL_CPCLK 0x00000400 /* F: Implementation defined */
#define CPU_CONTROL_BPRD_ENABLE 0x00000800 /* Z: Branch prediction enable */
#define CPU_CONTROL_IC_ENABLE 0x00001000 /* I: IC enable */
#define CPU_CONTROL_VECRELOC 0x00002000 /* V: Vector relocation */
#define CPU_CONTROL_ROUNDROBIN 0x00004000 /* RR: Predictable replacement */
#define CPU_CONTROL_V4COMPAT 0x00008000 /* L4: ARMv4 compat LDR R15 etc */
#define CPU_CONTROL_IDC_ENABLE CPU_CONTROL_DC_ENABLE
/* XScale Auxillary Control Register (CP15 register 1, opcode2 1) */
#define XSCALE_AUXCTL_K 0x00000001 /* dis. write buffer coalescing */
#define XSCALE_AUXCTL_P 0x00000002 /* ECC protect page table access */
#define XSCALE_AUXCTL_MD_WB_RA 0x00000000 /* mini-D$ wb, read-allocate */
#define XSCALE_AUXCTL_MD_WB_RWA 0x00000010 /* mini-D$ wb, read/write-allocate */
#define XSCALE_AUXCTL_MD_WT 0x00000020 /* mini-D$ wt, read-allocate */
#define XSCALE_AUXCTL_MD_MASK 0x00000030
/* Cache type register definitions */
#define CPU_CT_ISIZE(x) ((x) & 0xfff) /* I$ info */
#define CPU_CT_DSIZE(x) (((x) >> 12) & 0xfff) /* D$ info */
#define CPU_CT_S (1U << 24) /* split cache */
#define CPU_CT_CTYPE(x) (((x) >> 25) & 0xf) /* cache type */
#define CPU_CT_CTYPE_WT 0 /* write-through */
#define CPU_CT_CTYPE_WB1 1 /* write-back, clean w/ read */
#define CPU_CT_CTYPE_WB2 2 /* w/b, clean w/ cp15,7 */
#define CPU_CT_CTYPE_WB6 6 /* w/b, cp15,7, lockdown fmt A */
#define CPU_CT_CTYPE_WB7 7 /* w/b, cp15,7, lockdown fmt B */
#define CPU_CT_xSIZE_LEN(x) ((x) & 0x3) /* line size */
#define CPU_CT_xSIZE_M (1U << 2) /* multiplier */
#define CPU_CT_xSIZE_ASSOC(x) (((x) >> 3) & 0x7) /* associativity */
#define CPU_CT_xSIZE_SIZE(x) (((x) >> 6) & 0x7) /* size */
/* Fault status register definitions */
#define FAULT_TYPE_MASK 0x0f
#define FAULT_USER 0x10
#define FAULT_WRTBUF_0 0x00 /* Vector Exception */
#define FAULT_WRTBUF_1 0x02 /* Terminal Exception */
#define FAULT_BUSERR_0 0x04 /* External Abort on Linefetch -- Section */
#define FAULT_BUSERR_1 0x06 /* External Abort on Linefetch -- Page */
#define FAULT_BUSERR_2 0x08 /* External Abort on Non-linefetch -- Section */
#define FAULT_BUSERR_3 0x0a /* External Abort on Non-linefetch -- Page */
#define FAULT_BUSTRNL1 0x0c /* External abort on Translation -- Level 1 */
#define FAULT_BUSTRNL2 0x0e /* External abort on Translation -- Level 2 */
#define FAULT_ALIGN_0 0x01 /* Alignment */
#define FAULT_ALIGN_1 0x03 /* Alignment */
#define FAULT_TRANS_S 0x05 /* Translation -- Section */
#define FAULT_TRANS_P 0x07 /* Translation -- Page */
#define FAULT_DOMAIN_S 0x09 /* Domain -- Section */
#define FAULT_DOMAIN_P 0x0b /* Domain -- Page */
#define FAULT_PERM_S 0x0d /* Permission -- Section */
#define FAULT_PERM_P 0x0f /* Permission -- Page */
#define FAULT_IMPRECISE 0x400 /* Imprecise exception (XSCALE) */
/*
* Address of the vector page, low and high versions.
*/
#define ARM_VECTORS_LOW 0x00000000U
#define ARM_VECTORS_HIGH 0xffff0000U
/*
* ARM Instructions
*
* 3 3 2 2 2
* 1 0 9 8 7 0
* +-------+-------------------------------------------------------+
* | cond | instruction dependant |
* |c c c c| |
* +-------+-------------------------------------------------------+
*/
#define INSN_SIZE 4 /* Always 4 bytes */
#define INSN_COND_MASK 0xf0000000 /* Condition mask */
#define INSN_COND_AL 0xe0000000 /* Always condition */
#endif /* !MACHINE_ARMREG_H */
@@ -0,0 +1,672 @@
/* libs/pixelflinger/codeflinger/blending.cpp
**
** Copyright 2006, The Android Open Source Project
**
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
**
** http://www.apache.org/licenses/LICENSE-2.0
**
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*/
#include <assert.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <sys/types.h>
#include <cutils/log.h>
#include "codeflinger/GGLAssembler.h"
namespace android {
void GGLAssembler::build_fog(
component_t& temp, // incomming fragment / output
int component,
Scratch& regs)
{
if (mInfo[component].fog) {
Scratch scratches(registerFile());
comment("fog");
integer_t fragment(temp.reg, temp.h, temp.flags);
if (!(temp.flags & CORRUPTIBLE)) {
temp.reg = regs.obtain();
temp.flags |= CORRUPTIBLE;
}
integer_t fogColor(scratches.obtain(), 8, CORRUPTIBLE);
LDRB(AL, fogColor.reg, mBuilderContext.Rctx,
immed12_pre(GGL_OFFSETOF(state.fog.color[component])));
integer_t factor(scratches.obtain(), 16, CORRUPTIBLE);
CONTEXT_LOAD(factor.reg, generated_vars.f);
// clamp fog factor (TODO: see if there is a way to guarantee
// we won't overflow, when setting the iterators)
BIC(AL, 0, factor.reg, factor.reg, reg_imm(factor.reg, ASR, 31));
CMP(AL, factor.reg, imm( 0x10000 ));
MOV(HS, 0, factor.reg, imm( 0x10000 ));
build_blendFOneMinusF(temp, factor, fragment, fogColor);
}
}
void GGLAssembler::build_blending(
component_t& temp, // incomming fragment / output
const pixel_t& pixel, // framebuffer
int component,
Scratch& regs)
{
if (!mInfo[component].blend)
return;
int fs = component==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc;
int fd = component==GGLFormat::ALPHA ? mBlendDstA : mBlendDst;
if (fs==GGL_SRC_ALPHA_SATURATE && component==GGLFormat::ALPHA)
fs = GGL_ONE;
const int blending = blending_codes(fs, fd);
if (!temp.size()) {
// here, blending will produce something which doesn't depend on
// that component (eg: GL_ZERO:GL_*), so the register has not been
// allocated yet. Will never be used as a source.
temp = component_t(regs.obtain(), CORRUPTIBLE);
}
// we are doing real blending...
// fb: extracted dst
// fragment: extracted src
// temp: component_t(fragment) and result
// scoped register allocator
Scratch scratches(registerFile());
comment("blending");
// we can optimize these cases a bit...
// (1) saturation is not needed
// (2) we can use only one multiply instead of 2
// (3) we can reduce the register pressure
// R = S*f + D*(1-f) = (S-D)*f + D
// R = S*(1-f) + D*f = (D-S)*f + S
const bool same_factor_opt1 =
(fs==GGL_DST_COLOR && fd==GGL_ONE_MINUS_DST_COLOR) ||
(fs==GGL_SRC_COLOR && fd==GGL_ONE_MINUS_SRC_COLOR) ||
(fs==GGL_DST_ALPHA && fd==GGL_ONE_MINUS_DST_ALPHA) ||
(fs==GGL_SRC_ALPHA && fd==GGL_ONE_MINUS_SRC_ALPHA);
const bool same_factor_opt2 =
(fs==GGL_ONE_MINUS_DST_COLOR && fd==GGL_DST_COLOR) ||
(fs==GGL_ONE_MINUS_SRC_COLOR && fd==GGL_SRC_COLOR) ||
(fs==GGL_ONE_MINUS_DST_ALPHA && fd==GGL_DST_ALPHA) ||
(fs==GGL_ONE_MINUS_SRC_ALPHA && fd==GGL_SRC_ALPHA);
// XXX: we could also optimize these cases:
// R = S*f + D*f = (S+D)*f
// R = S*(1-f) + D*(1-f) = (S+D)*(1-f)
// R = S*D + D*S = 2*S*D
// see if we need to extract 'component' from the destination (fb)
integer_t fb;
if (blending & (BLEND_DST|FACTOR_DST)) {
fb.setTo(scratches.obtain(), 32);
extract(fb, pixel, component);
if (mDithering) {
// XXX: maybe what we should do instead, is simply
// expand fb -or- fragment to the larger of the two
if (fb.size() < temp.size()) {
// for now we expand 'fb' to min(fragment, 8)
int new_size = temp.size() < 8 ? temp.size() : 8;
expand(fb, fb, new_size);
}
}
}
// convert input fragment to integer_t
if (temp.l && (temp.flags & CORRUPTIBLE)) {
MOV(AL, 0, temp.reg, reg_imm(temp.reg, LSR, temp.l));
temp.h -= temp.l;
temp.l = 0;
}
integer_t fragment(temp.reg, temp.size(), temp.flags);
// if not done yet, convert input fragment to integer_t
if (temp.l) {
// here we know temp is not CORRUPTIBLE
fragment.reg = scratches.obtain();
MOV(AL, 0, fragment.reg, reg_imm(temp.reg, LSR, temp.l));
fragment.flags |= CORRUPTIBLE;
}
if (!(temp.flags & CORRUPTIBLE)) {
// temp is not corruptible, but since it's the destination it
// will be modified, so we need to allocate a new register.
temp.reg = regs.obtain();
temp.flags &= ~CORRUPTIBLE;
fragment.flags &= ~CORRUPTIBLE;
}
if ((blending & BLEND_SRC) && !same_factor_opt1) {
// source (fragment) is needed for the blending stage
// so it's not CORRUPTIBLE (unless we're doing same_factor_opt1)
fragment.flags &= ~CORRUPTIBLE;
}
if (same_factor_opt1) {
// R = S*f + D*(1-f) = (S-D)*f + D
integer_t factor;
build_blend_factor(factor, fs,
component, pixel, fragment, fb, scratches);
// fb is always corruptible from this point
fb.flags |= CORRUPTIBLE;
build_blendFOneMinusF(temp, factor, fragment, fb);
} else if (same_factor_opt2) {
// R = S*(1-f) + D*f = (D-S)*f + S
integer_t factor;
// fb is always corrruptible here
fb.flags |= CORRUPTIBLE;
build_blend_factor(factor, fd,
component, pixel, fragment, fb, scratches);
build_blendOneMinusFF(temp, factor, fragment, fb);
} else {
integer_t src_factor;
integer_t dst_factor;
// if destination (fb) is not needed for the blending stage,
// then it can be marked as CORRUPTIBLE
if (!(blending & BLEND_DST)) {
fb.flags |= CORRUPTIBLE;
}
// XXX: try to mark some registers as CORRUPTIBLE
// in most case we could make those corruptible
// when we're processing the last component
// but not always, for instance
// when fragment is constant and not reloaded
// when fb is needed for logic-ops or masking
// when a register is aliased (for instance with mAlphaSource)
// blend away...
if (fs==GGL_ZERO) {
if (fd==GGL_ZERO) { // R = 0
// already taken care of
} else if (fd==GGL_ONE) { // R = D
// already taken care of
} else { // R = D*fd
// compute fd
build_blend_factor(dst_factor, fd,
component, pixel, fragment, fb, scratches);
mul_factor(temp, fb, dst_factor);
}
} else if (fs==GGL_ONE) {
if (fd==GGL_ZERO) { // R = S
// NOP, taken care of
} else if (fd==GGL_ONE) { // R = S + D
component_add(temp, fb, fragment); // args order matters
component_sat(temp);
} else { // R = S + D*fd
// compute fd
build_blend_factor(dst_factor, fd,
component, pixel, fragment, fb, scratches);
mul_factor_add(temp, fb, dst_factor, component_t(fragment));
component_sat(temp);
}
} else {
// compute fs
build_blend_factor(src_factor, fs,
component, pixel, fragment, fb, scratches);
if (fd==GGL_ZERO) { // R = S*fs
mul_factor(temp, fragment, src_factor);
} else if (fd==GGL_ONE) { // R = S*fs + D
mul_factor_add(temp, fragment, src_factor, component_t(fb));
component_sat(temp);
} else { // R = S*fs + D*fd
mul_factor(temp, fragment, src_factor);
if (scratches.isUsed(src_factor.reg))
scratches.recycle(src_factor.reg);
// compute fd
build_blend_factor(dst_factor, fd,
component, pixel, fragment, fb, scratches);
mul_factor_add(temp, fb, dst_factor, temp);
if (!same_factor_opt1 && !same_factor_opt2) {
component_sat(temp);
}
}
}
}
// now we can be corrupted (it's the dest)
temp.flags |= CORRUPTIBLE;
}
void GGLAssembler::build_blend_factor(
integer_t& factor, int f, int component,
const pixel_t& dst_pixel,
integer_t& fragment,
integer_t& fb,
Scratch& scratches)
{
integer_t src_alpha(fragment);
// src_factor/dst_factor won't be used after blending,
// so it's fine to mark them as CORRUPTIBLE (if not aliased)
factor.flags |= CORRUPTIBLE;
switch(f) {
case GGL_ONE_MINUS_SRC_ALPHA:
case GGL_SRC_ALPHA:
if (component==GGLFormat::ALPHA && !isAlphaSourceNeeded()) {
// we're processing alpha, so we already have
// src-alpha in fragment, and we need src-alpha just this time.
} else {
// alpha-src will be needed for other components
if (!mBlendFactorCached || mBlendFactorCached==f) {
src_alpha = mAlphaSource;
factor = mAlphaSource;
factor.flags &= ~CORRUPTIBLE;
// we already computed the blend factor before, nothing to do.
if (mBlendFactorCached)
return;
// this is the first time, make sure to compute the blend
// factor properly.
mBlendFactorCached = f;
break;
} else {
// we have a cached alpha blend factor, but we want another one,
// this should really not happen because by construction,
// we cannot have BOTH source and destination
// blend factors use ALPHA *and* ONE_MINUS_ALPHA (because
// the blending stage uses the f/(1-f) optimization
// for completeness, we handle this case though. Since there
// are only 2 choices, this meens we want "the other one"
// (1-factor)
factor = mAlphaSource;
factor.flags &= ~CORRUPTIBLE;
RSB(AL, 0, factor.reg, factor.reg, imm((1<<factor.s)));
mBlendFactorCached = f;
return;
}
}
// fall-through...
case GGL_ONE_MINUS_DST_COLOR:
case GGL_DST_COLOR:
case GGL_ONE_MINUS_SRC_COLOR:
case GGL_SRC_COLOR:
case GGL_ONE_MINUS_DST_ALPHA:
case GGL_DST_ALPHA:
case GGL_SRC_ALPHA_SATURATE:
// help us find out what register we can use for the blend-factor
// CORRUPTIBLE registers are chosen first, or a new one is allocated.
if (fragment.flags & CORRUPTIBLE) {
factor.setTo(fragment.reg, 32, CORRUPTIBLE);
fragment.flags &= ~CORRUPTIBLE;
} else if (fb.flags & CORRUPTIBLE) {
factor.setTo(fb.reg, 32, CORRUPTIBLE);
fb.flags &= ~CORRUPTIBLE;
} else {
factor.setTo(scratches.obtain(), 32, CORRUPTIBLE);
}
break;
}
// XXX: doesn't work if size==1
switch(f) {
case GGL_ONE_MINUS_DST_COLOR:
case GGL_DST_COLOR:
factor.s = fb.s;
ADD(AL, 0, factor.reg, fb.reg, reg_imm(fb.reg, LSR, fb.s-1));
break;
case GGL_ONE_MINUS_SRC_COLOR:
case GGL_SRC_COLOR:
factor.s = fragment.s;
ADD(AL, 0, factor.reg, fragment.reg,
reg_imm(fragment.reg, LSR, fragment.s-1));
break;
case GGL_ONE_MINUS_SRC_ALPHA:
case GGL_SRC_ALPHA:
factor.s = src_alpha.s;
ADD(AL, 0, factor.reg, src_alpha.reg,
reg_imm(src_alpha.reg, LSR, src_alpha.s-1));
break;
case GGL_ONE_MINUS_DST_ALPHA:
case GGL_DST_ALPHA:
// XXX: should be precomputed
extract(factor, dst_pixel, GGLFormat::ALPHA);
ADD(AL, 0, factor.reg, factor.reg,
reg_imm(factor.reg, LSR, factor.s-1));
break;
case GGL_SRC_ALPHA_SATURATE:
// XXX: should be precomputed
// XXX: f = min(As, 1-Ad)
// btw, we're guaranteed that Ad's size is <= 8, because
// it's extracted from the framebuffer
break;
}
switch(f) {
case GGL_ONE_MINUS_DST_COLOR:
case GGL_ONE_MINUS_SRC_COLOR:
case GGL_ONE_MINUS_DST_ALPHA:
case GGL_ONE_MINUS_SRC_ALPHA:
RSB(AL, 0, factor.reg, factor.reg, imm((1<<factor.s)));
}
// don't need more than 8-bits for the blend factor
// and this will prevent overflows in the multiplies later
if (factor.s > 8) {
MOV(AL, 0, factor.reg, reg_imm(factor.reg, LSR, factor.s-8));
factor.s = 8;
}
}
int GGLAssembler::blending_codes(int fs, int fd)
{
int blending = 0;
switch(fs) {
case GGL_ONE:
blending |= BLEND_SRC;
break;
case GGL_ONE_MINUS_DST_COLOR:
case GGL_DST_COLOR:
blending |= FACTOR_DST|BLEND_SRC;
break;
case GGL_ONE_MINUS_DST_ALPHA:
case GGL_DST_ALPHA:
// no need to extract 'component' from the destination
// for the blend factor, because we need ALPHA only.
blending |= BLEND_SRC;
break;
case GGL_ONE_MINUS_SRC_COLOR:
case GGL_SRC_COLOR:
blending |= FACTOR_SRC|BLEND_SRC;
break;
case GGL_ONE_MINUS_SRC_ALPHA:
case GGL_SRC_ALPHA:
case GGL_SRC_ALPHA_SATURATE:
blending |= FACTOR_SRC|BLEND_SRC;
break;
}
switch(fd) {
case GGL_ONE:
blending |= BLEND_DST;
break;
case GGL_ONE_MINUS_DST_COLOR:
case GGL_DST_COLOR:
blending |= FACTOR_DST|BLEND_DST;
break;
case GGL_ONE_MINUS_DST_ALPHA:
case GGL_DST_ALPHA:
blending |= FACTOR_DST|BLEND_DST;
break;
case GGL_ONE_MINUS_SRC_COLOR:
case GGL_SRC_COLOR:
blending |= FACTOR_SRC|BLEND_DST;
break;
case GGL_ONE_MINUS_SRC_ALPHA:
case GGL_SRC_ALPHA:
// no need to extract 'component' from the source
// for the blend factor, because we need ALPHA only.
blending |= BLEND_DST;
break;
}
return blending;
}
// ---------------------------------------------------------------------------
void GGLAssembler::build_blendFOneMinusF(
component_t& temp,
const integer_t& factor,
const integer_t& fragment,
const integer_t& fb)
{
// R = S*f + D*(1-f) = (S-D)*f + D
Scratch scratches(registerFile());
// compute S-D
integer_t diff(fragment.flags & CORRUPTIBLE ?
fragment.reg : scratches.obtain(), fb.size(), CORRUPTIBLE);
const int shift = fragment.size() - fb.size();
if (shift>0) RSB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSR, shift));
else if (shift<0) RSB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSL,-shift));
else RSB(AL, 0, diff.reg, fb.reg, fragment.reg);
mul_factor_add(temp, diff, factor, component_t(fb));
}
void GGLAssembler::build_blendOneMinusFF(
component_t& temp,
const integer_t& factor,
const integer_t& fragment,
const integer_t& fb)
{
// R = S*f + D*(1-f) = (S-D)*f + D
Scratch scratches(registerFile());
// compute D-S
integer_t diff(fb.flags & CORRUPTIBLE ?
fb.reg : scratches.obtain(), fb.size(), CORRUPTIBLE);
const int shift = fragment.size() - fb.size();
if (shift>0) SUB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSR, shift));
else if (shift<0) SUB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSL,-shift));
else SUB(AL, 0, diff.reg, fb.reg, fragment.reg);
mul_factor_add(temp, diff, factor, component_t(fragment));
}
// ---------------------------------------------------------------------------
void GGLAssembler::mul_factor( component_t& d,
const integer_t& v,
const integer_t& f)
{
int vs = v.size();
int fs = f.size();
int ms = vs+fs;
// XXX: we could have special cases for 1 bit mul
// all this code below to use the best multiply instruction
// wrt the parameters size. We take advantage of the fact
// that the 16-bits multiplies allow a 16-bit shift
// The trick is that we just make sure that we have at least 8-bits
// per component (which is enough for a 8 bits display).
int xy;
int vshift = 0;
int fshift = 0;
int smulw = 0;
if (vs<16) {
if (fs<16) {
xy = xyBB;
} else if (GGL_BETWEEN(fs, 24, 31)) {
ms -= 16;
xy = xyTB;
} else {
// eg: 15 * 18 -> 15 * 15
fshift = fs - 15;
ms -= fshift;
xy = xyBB;
}
} else if (GGL_BETWEEN(vs, 24, 31)) {
if (fs<16) {
ms -= 16;
xy = xyTB;
} else if (GGL_BETWEEN(fs, 24, 31)) {
ms -= 32;
xy = xyTT;
} else {
// eg: 24 * 18 -> 8 * 18
fshift = fs - 15;
ms -= 16 + fshift;
xy = xyTB;
}
} else {
if (fs<16) {
// eg: 18 * 15 -> 15 * 15
vshift = vs - 15;
ms -= vshift;
xy = xyBB;
} else if (GGL_BETWEEN(fs, 24, 31)) {
// eg: 18 * 24 -> 15 * 8
vshift = vs - 15;
ms -= 16 + vshift;
xy = xyBT;
} else {
// eg: 18 * 18 -> (15 * 18)>>16
fshift = fs - 15;
ms -= 16 + fshift;
xy = yB; //XXX SMULWB
smulw = 1;
}
}
LOGE_IF(ms>=32, "mul_factor overflow vs=%d, fs=%d", vs, fs);
int vreg = v.reg;
int freg = f.reg;
if (vshift) {
MOV(AL, 0, d.reg, reg_imm(vreg, LSR, vshift));
vreg = d.reg;
}
if (fshift) {
MOV(AL, 0, d.reg, reg_imm(vreg, LSR, fshift));
freg = d.reg;
}
if (smulw) SMULW(AL, xy, d.reg, vreg, freg);
else SMUL(AL, xy, d.reg, vreg, freg);
d.h = ms;
if (mDithering) {
d.l = 0;
} else {
d.l = fs;
d.flags |= CLEAR_LO;
}
}
void GGLAssembler::mul_factor_add( component_t& d,
const integer_t& v,
const integer_t& f,
const component_t& a)
{
// XXX: we could have special cases for 1 bit mul
Scratch scratches(registerFile());
int vs = v.size();
int fs = f.size();
int as = a.h;
int ms = vs+fs;
LOGE_IF(ms>=32, "mul_factor_add overflow vs=%d, fs=%d, as=%d", vs, fs, as);
integer_t add(a.reg, a.h, a.flags);
// 'a' is a component_t but it is guaranteed to have
// its high bits set to 0. However in the dithering case,
// we can't get away with truncating the potentially bad bits
// so extraction is needed.
if ((mDithering) && (a.size() < ms)) {
// we need to expand a
if (!(a.flags & CORRUPTIBLE)) {
// ... but it's not corruptible, so we need to pick a
// temporary register.
// Try to uses the destination register first (it's likely
// to be usable, unless it aliases an input).
if (d.reg!=a.reg && d.reg!=v.reg && d.reg!=f.reg) {
add.reg = d.reg;
} else {
add.reg = scratches.obtain();
}
}
expand(add, a, ms); // extracts and expands
as = ms;
}
if (ms == as) {
if (vs<16 && fs<16) SMLABB(AL, d.reg, v.reg, f.reg, add.reg);
else MLA(AL, 0, d.reg, v.reg, f.reg, add.reg);
} else {
int temp = d.reg;
if (temp == add.reg) {
// the mul will modify add.reg, we need an intermediary reg
if (v.flags & CORRUPTIBLE) temp = v.reg;
else if (f.flags & CORRUPTIBLE) temp = f.reg;
else temp = scratches.obtain();
}
if (vs<16 && fs<16) SMULBB(AL, temp, v.reg, f.reg);
else MUL(AL, 0, temp, v.reg, f.reg);
if (ms>as) {
ADD(AL, 0, d.reg, temp, reg_imm(add.reg, LSL, ms-as));
} else if (ms<as) {
// not sure if we should expand the mul instead?
ADD(AL, 0, d.reg, temp, reg_imm(add.reg, LSR, as-ms));
}
}
d.h = ms;
if (mDithering) {
d.l = a.l;
} else {
d.l = fs>a.l ? fs : a.l;
d.flags |= CLEAR_LO;
}
}
void GGLAssembler::component_add(component_t& d,
const integer_t& dst, const integer_t& src)
{
// here we're guaranteed that fragment.size() >= fb.size()
const int shift = src.size() - dst.size();
if (!shift) {
ADD(AL, 0, d.reg, src.reg, dst.reg);
} else {
ADD(AL, 0, d.reg, src.reg, reg_imm(dst.reg, LSL, shift));
}
d.h = src.size();
if (mDithering) {
d.l = 0;
} else {
d.l = shift;
d.flags |= CLEAR_LO;
}
}
void GGLAssembler::component_sat(const component_t& v)
{
const int one = ((1<<v.size())-1)<<v.l;
CMP(AL, v.reg, imm( 1<<v.h ));
if (isValidImmediate(one)) {
MOV(HS, 0, v.reg, imm( one ));
} else if (isValidImmediate(~one)) {
MVN(HS, 0, v.reg, imm( ~one ));
} else {
MOV(HS, 0, v.reg, imm( 1<<v.h ));
SUB(HS, 0, v.reg, v.reg, imm( 1<<v.l ));
}
}
// ----------------------------------------------------------------------------
}; // namespace android
@@ -0,0 +1,708 @@
/* $NetBSD: disassem.c,v 1.14 2003/03/27 16:58:36 mycroft Exp $ */
/*-
* Copyright (c) 1996 Mark Brinicombe.
* Copyright (c) 1996 Brini.
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Brini.
* 4. The name of the company nor the name of the author may be used to
* endorse or promote products derived from this software without specific
* prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* RiscBSD kernel project
*
* db_disasm.c
*
* Kernel disassembler
*
* Created : 10/02/96
*
* Structured after the sparc/sparc/db_disasm.c by David S. Miller &
* Paul Kranenburg
*
* This code is not complete. Not all instructions are disassembled.
*/
#include <sys/cdefs.h>
//__FBSDID("$FreeBSD: /repoman/r/ncvs/src/sys/arm/arm/disassem.c,v 1.2 2005/01/05 21:58:47 imp Exp $");
#include <sys/param.h>
#include <stdio.h>
#include "disassem.h"
#include "armreg.h"
//#include <ddb/ddb.h>
/*
* General instruction format
*
* insn[cc][mod] [operands]
*
* Those fields with an uppercase format code indicate that the field
* follows directly after the instruction before the separator i.e.
* they modify the instruction rather than just being an operand to
* the instruction. The only exception is the writeback flag which
* follows a operand.
*
*
* 2 - print Operand 2 of a data processing instruction
* d - destination register (bits 12-15)
* n - n register (bits 16-19)
* s - s register (bits 8-11)
* o - indirect register rn (bits 16-19) (used by swap)
* m - m register (bits 0-3)
* a - address operand of ldr/str instruction
* e - address operand of ldrh/strh instruction
* l - register list for ldm/stm instruction
* f - 1st fp operand (register) (bits 12-14)
* g - 2nd fp operand (register) (bits 16-18)
* h - 3rd fp operand (register/immediate) (bits 0-4)
* j - xtb rotate literal (bits 10-11)
* b - branch address
* t - thumb branch address (bits 24, 0-23)
* k - breakpoint comment (bits 0-3, 8-19)
* X - block transfer type
* Y - block transfer type (r13 base)
* c - comment field bits(0-23)
* p - saved or current status register
* F - PSR transfer fields
* D - destination-is-r15 (P) flag on TST, TEQ, CMP, CMN
* L - co-processor transfer size
* S - set status flag
* P - fp precision
* Q - fp precision (for ldf/stf)
* R - fp rounding
* v - co-processor data transfer registers + addressing mode
* W - writeback flag
* x - instruction in hex
* # - co-processor number
* y - co-processor data processing registers
* z - co-processor register transfer registers
*/
struct arm32_insn {
u_int mask;
u_int pattern;
char* name;
char* format;
};
static const struct arm32_insn arm32_i[] = {
{ 0x0fffffff, 0x0ff00000, "imb", "c" }, /* Before swi */
{ 0x0fffffff, 0x0ff00001, "imbrange", "c" }, /* Before swi */
{ 0x0f000000, 0x0f000000, "swi", "c" },
{ 0xfe000000, 0xfa000000, "blx", "t" }, /* Before b and bl */
{ 0x0f000000, 0x0a000000, "b", "b" },
{ 0x0f000000, 0x0b000000, "bl", "b" },
{ 0x0fe000f0, 0x00000090, "mul", "Snms" },
{ 0x0fe000f0, 0x00200090, "mla", "Snmsd" },
{ 0x0fe000f0, 0x00800090, "umull", "Sdnms" },
{ 0x0fe000f0, 0x00c00090, "smull", "Sdnms" },
{ 0x0fe000f0, 0x00a00090, "umlal", "Sdnms" },
{ 0x0fe000f0, 0x00e00090, "smlal", "Sdnms" },
{ 0x0fff03f0, 0x06cf0070, "uxtb16", "dmj" },
{ 0x0d700000, 0x04200000, "strt", "daW" },
{ 0x0d700000, 0x04300000, "ldrt", "daW" },
{ 0x0d700000, 0x04600000, "strbt", "daW" },
{ 0x0d700000, 0x04700000, "ldrbt", "daW" },
{ 0x0c500000, 0x04000000, "str", "daW" },
{ 0x0c500000, 0x04100000, "ldr", "daW" },
{ 0x0c500000, 0x04400000, "strb", "daW" },
{ 0x0c500000, 0x04500000, "ldrb", "daW" },
{ 0x0e1f0000, 0x080d0000, "stm", "YnWl" },/* separate out r13 base */
{ 0x0e1f0000, 0x081d0000, "ldm", "YnWl" },/* separate out r13 base */
{ 0x0e100000, 0x08000000, "stm", "XnWl" },
{ 0x0e100000, 0x08100000, "ldm", "XnWl" },
{ 0x0e1000f0, 0x00100090, "ldrb", "deW" },
{ 0x0e1000f0, 0x00000090, "strb", "deW" },
{ 0x0e1000f0, 0x001000d0, "ldrsb", "deW" },
{ 0x0e1000f0, 0x001000b0, "ldrh", "deW" },
{ 0x0e1000f0, 0x000000b0, "strh", "deW" },
{ 0x0e1000f0, 0x001000f0, "ldrsh", "deW" },
{ 0x0f200090, 0x00200090, "und", "x" }, /* Before data processing */
{ 0x0e1000d0, 0x000000d0, "und", "x" }, /* Before data processing */
{ 0x0ff00ff0, 0x01000090, "swp", "dmo" },
{ 0x0ff00ff0, 0x01400090, "swpb", "dmo" },
{ 0x0fbf0fff, 0x010f0000, "mrs", "dp" }, /* Before data processing */
{ 0x0fb0fff0, 0x0120f000, "msr", "pFm" },/* Before data processing */
{ 0x0fb0f000, 0x0320f000, "msr", "pF2" },/* Before data processing */
{ 0x0ffffff0, 0x012fff10, "bx", "m" },
{ 0x0fff0ff0, 0x016f0f10, "clz", "dm" },
{ 0x0ffffff0, 0x012fff30, "blx", "m" },
{ 0xfff000f0, 0xe1200070, "bkpt", "k" },
{ 0x0de00000, 0x00000000, "and", "Sdn2" },
{ 0x0de00000, 0x00200000, "eor", "Sdn2" },
{ 0x0de00000, 0x00400000, "sub", "Sdn2" },
{ 0x0de00000, 0x00600000, "rsb", "Sdn2" },
{ 0x0de00000, 0x00800000, "add", "Sdn2" },
{ 0x0de00000, 0x00a00000, "adc", "Sdn2" },
{ 0x0de00000, 0x00c00000, "sbc", "Sdn2" },
{ 0x0de00000, 0x00e00000, "rsc", "Sdn2" },
{ 0x0df00000, 0x01100000, "tst", "Dn2" },
{ 0x0df00000, 0x01300000, "teq", "Dn2" },
{ 0x0df00000, 0x01500000, "cmp", "Dn2" },
{ 0x0df00000, 0x01700000, "cmn", "Dn2" },
{ 0x0de00000, 0x01800000, "orr", "Sdn2" },
{ 0x0de00000, 0x01a00000, "mov", "Sd2" },
{ 0x0de00000, 0x01c00000, "bic", "Sdn2" },
{ 0x0de00000, 0x01e00000, "mvn", "Sd2" },
{ 0x0ff08f10, 0x0e000100, "adf", "PRfgh" },
{ 0x0ff08f10, 0x0e100100, "muf", "PRfgh" },
{ 0x0ff08f10, 0x0e200100, "suf", "PRfgh" },
{ 0x0ff08f10, 0x0e300100, "rsf", "PRfgh" },
{ 0x0ff08f10, 0x0e400100, "dvf", "PRfgh" },
{ 0x0ff08f10, 0x0e500100, "rdf", "PRfgh" },
{ 0x0ff08f10, 0x0e600100, "pow", "PRfgh" },
{ 0x0ff08f10, 0x0e700100, "rpw", "PRfgh" },
{ 0x0ff08f10, 0x0e800100, "rmf", "PRfgh" },
{ 0x0ff08f10, 0x0e900100, "fml", "PRfgh" },
{ 0x0ff08f10, 0x0ea00100, "fdv", "PRfgh" },
{ 0x0ff08f10, 0x0eb00100, "frd", "PRfgh" },
{ 0x0ff08f10, 0x0ec00100, "pol", "PRfgh" },
{ 0x0f008f10, 0x0e000100, "fpbop", "PRfgh" },
{ 0x0ff08f10, 0x0e008100, "mvf", "PRfh" },
{ 0x0ff08f10, 0x0e108100, "mnf", "PRfh" },
{ 0x0ff08f10, 0x0e208100, "abs", "PRfh" },
{ 0x0ff08f10, 0x0e308100, "rnd", "PRfh" },
{ 0x0ff08f10, 0x0e408100, "sqt", "PRfh" },
{ 0x0ff08f10, 0x0e508100, "log", "PRfh" },
{ 0x0ff08f10, 0x0e608100, "lgn", "PRfh" },
{ 0x0ff08f10, 0x0e708100, "exp", "PRfh" },
{ 0x0ff08f10, 0x0e808100, "sin", "PRfh" },
{ 0x0ff08f10, 0x0e908100, "cos", "PRfh" },
{ 0x0ff08f10, 0x0ea08100, "tan", "PRfh" },
{ 0x0ff08f10, 0x0eb08100, "asn", "PRfh" },
{ 0x0ff08f10, 0x0ec08100, "acs", "PRfh" },
{ 0x0ff08f10, 0x0ed08100, "atn", "PRfh" },
{ 0x0f008f10, 0x0e008100, "fpuop", "PRfh" },
{ 0x0e100f00, 0x0c000100, "stf", "QLv" },
{ 0x0e100f00, 0x0c100100, "ldf", "QLv" },
{ 0x0ff00f10, 0x0e000110, "flt", "PRgd" },
{ 0x0ff00f10, 0x0e100110, "fix", "PRdh" },
{ 0x0ff00f10, 0x0e200110, "wfs", "d" },
{ 0x0ff00f10, 0x0e300110, "rfs", "d" },
{ 0x0ff00f10, 0x0e400110, "wfc", "d" },
{ 0x0ff00f10, 0x0e500110, "rfc", "d" },
{ 0x0ff0ff10, 0x0e90f110, "cmf", "PRgh" },
{ 0x0ff0ff10, 0x0eb0f110, "cnf", "PRgh" },
{ 0x0ff0ff10, 0x0ed0f110, "cmfe", "PRgh" },
{ 0x0ff0ff10, 0x0ef0f110, "cnfe", "PRgh" },
{ 0xff100010, 0xfe000010, "mcr2", "#z" },
{ 0x0f100010, 0x0e000010, "mcr", "#z" },
{ 0xff100010, 0xfe100010, "mrc2", "#z" },
{ 0x0f100010, 0x0e100010, "mrc", "#z" },
{ 0xff000010, 0xfe000000, "cdp2", "#y" },
{ 0x0f000010, 0x0e000000, "cdp", "#y" },
{ 0xfe100090, 0xfc100000, "ldc2", "L#v" },
{ 0x0e100090, 0x0c100000, "ldc", "L#v" },
{ 0xfe100090, 0xfc000000, "stc2", "L#v" },
{ 0x0e100090, 0x0c000000, "stc", "L#v" },
{ 0xf550f000, 0xf550f000, "pld", "ne" },
{ 0x0ff00ff0, 0x01000050, "qaad", "dmn" },
{ 0x0ff00ff0, 0x01400050, "qdaad", "dmn" },
{ 0x0ff00ff0, 0x01600050, "qdsub", "dmn" },
{ 0x0ff00ff0, 0x01200050, "dsub", "dmn" },
{ 0x0ff000f0, 0x01000080, "smlabb", "nmsd" }, // d & n inverted!!
{ 0x0ff000f0, 0x010000a0, "smlatb", "nmsd" }, // d & n inverted!!
{ 0x0ff000f0, 0x010000c0, "smlabt", "nmsd" }, // d & n inverted!!
{ 0x0ff000f0, 0x010000e0, "smlatt", "nmsd" }, // d & n inverted!!
{ 0x0ff000f0, 0x01400080, "smlalbb","ndms" }, // d & n inverted!!
{ 0x0ff000f0, 0x014000a0, "smlaltb","ndms" }, // d & n inverted!!
{ 0x0ff000f0, 0x014000c0, "smlalbt","ndms" }, // d & n inverted!!
{ 0x0ff000f0, 0x014000e0, "smlaltt","ndms" }, // d & n inverted!!
{ 0x0ff000f0, 0x01200080, "smlawb", "nmsd" }, // d & n inverted!!
{ 0x0ff0f0f0, 0x012000a0, "smulwb","nms" }, // d & n inverted!!
{ 0x0ff000f0, 0x012000c0, "smlawt", "nmsd" }, // d & n inverted!!
{ 0x0ff0f0f0, 0x012000e0, "smulwt","nms" }, // d & n inverted!!
{ 0x0ff0f0f0, 0x01600080, "smulbb","nms" }, // d & n inverted!!
{ 0x0ff0f0f0, 0x016000a0, "smultb","nms" }, // d & n inverted!!
{ 0x0ff0f0f0, 0x016000c0, "smulbt","nms" }, // d & n inverted!!
{ 0x0ff0f0f0, 0x016000e0, "smultt","nms" }, // d & n inverted!!
{ 0x00000000, 0x00000000, NULL, NULL }
};
static char const arm32_insn_conditions[][4] = {
"eq", "ne", "cs", "cc",
"mi", "pl", "vs", "vc",
"hi", "ls", "ge", "lt",
"gt", "le", "", "nv"
};
static char const insn_block_transfers[][4] = {
"da", "ia", "db", "ib"
};
static char const insn_stack_block_transfers[][4] = {
"ed", "ea", "fd", "fa"
};
static char const op_shifts[][4] = {
"lsl", "lsr", "asr", "ror"
};
static char const insn_fpa_rounding[][2] = {
"", "p", "m", "z"
};
static char const insn_fpa_precision[][2] = {
"s", "d", "e", "p"
};
static char const insn_fpaconstants[][8] = {
"0.0", "1.0", "2.0", "3.0",
"4.0", "5.0", "0.5", "10.0"
};
#define insn_condition(x) arm32_insn_conditions[(x >> 28) & 0x0f]
#define insn_blktrans(x) insn_block_transfers[(x >> 23) & 3]
#define insn_stkblktrans(x) insn_stack_block_transfers[(3*((x >> 20)&1))^((x >> 23)&3)]
#define op2_shift(x) op_shifts[(x >> 5) & 3]
#define insn_fparnd(x) insn_fpa_rounding[(x >> 5) & 0x03]
#define insn_fpaprec(x) insn_fpa_precision[(((x >> 18) & 2)|(x >> 7)) & 1]
#define insn_fpaprect(x) insn_fpa_precision[(((x >> 21) & 2)|(x >> 15)) & 1]
#define insn_fpaimm(x) insn_fpaconstants[x & 0x07]
/* Local prototypes */
static void disasm_register_shift(const disasm_interface_t *di, u_int insn);
static void disasm_print_reglist(const disasm_interface_t *di, u_int insn);
static void disasm_insn_ldrstr(const disasm_interface_t *di, u_int insn,
u_int loc);
static void disasm_insn_ldrhstrh(const disasm_interface_t *di, u_int insn,
u_int loc);
static void disasm_insn_ldcstc(const disasm_interface_t *di, u_int insn,
u_int loc);
static u_int disassemble_readword(u_int address);
static void disassemble_printaddr(u_int address);
u_int
disasm(const disasm_interface_t *di, u_int loc, int altfmt)
{
const struct arm32_insn *i_ptr = &arm32_i[0];
u_int insn;
int matchp;
int branch;
char* f_ptr;
int fmt;
fmt = 0;
matchp = 0;
insn = di->di_readword(loc);
/* di->di_printf("loc=%08x insn=%08x : ", loc, insn);*/
while (i_ptr->name) {
if ((insn & i_ptr->mask) == i_ptr->pattern) {
matchp = 1;
break;
}
i_ptr++;
}
if (!matchp) {
di->di_printf("und%s\t%08x\n", insn_condition(insn), insn);
return(loc + INSN_SIZE);
}
/* If instruction forces condition code, don't print it. */
if ((i_ptr->mask & 0xf0000000) == 0xf0000000)
di->di_printf("%s", i_ptr->name);
else
di->di_printf("%s%s", i_ptr->name, insn_condition(insn));
f_ptr = i_ptr->format;
/* Insert tab if there are no instruction modifiers */
if (*(f_ptr) < 'A' || *(f_ptr) > 'Z') {
++fmt;
di->di_printf("\t");
}
while (*f_ptr) {
switch (*f_ptr) {
/* 2 - print Operand 2 of a data processing instruction */
case '2':
if (insn & 0x02000000) {
int rotate= ((insn >> 7) & 0x1e);
di->di_printf("#0x%08x",
(insn & 0xff) << (32 - rotate) |
(insn & 0xff) >> rotate);
} else {
disasm_register_shift(di, insn);
}
break;
/* d - destination register (bits 12-15) */
case 'd':
di->di_printf("r%d", ((insn >> 12) & 0x0f));
break;
/* D - insert 'p' if Rd is R15 */
case 'D':
if (((insn >> 12) & 0x0f) == 15)
di->di_printf("p");
break;
/* n - n register (bits 16-19) */
case 'n':
di->di_printf("r%d", ((insn >> 16) & 0x0f));
break;
/* s - s register (bits 8-11) */
case 's':
di->di_printf("r%d", ((insn >> 8) & 0x0f));
break;
/* o - indirect register rn (bits 16-19) (used by swap) */
case 'o':
di->di_printf("[r%d]", ((insn >> 16) & 0x0f));
break;
/* m - m register (bits 0-4) */
case 'm':
di->di_printf("r%d", ((insn >> 0) & 0x0f));
break;
/* a - address operand of ldr/str instruction */
case 'a':
disasm_insn_ldrstr(di, insn, loc);
break;
/* e - address operand of ldrh/strh instruction */
case 'e':
disasm_insn_ldrhstrh(di, insn, loc);
break;
/* l - register list for ldm/stm instruction */
case 'l':
disasm_print_reglist(di, insn);
break;
/* f - 1st fp operand (register) (bits 12-14) */
case 'f':
di->di_printf("f%d", (insn >> 12) & 7);
break;
/* g - 2nd fp operand (register) (bits 16-18) */
case 'g':
di->di_printf("f%d", (insn >> 16) & 7);
break;
/* h - 3rd fp operand (register/immediate) (bits 0-4) */
case 'h':
if (insn & (1 << 3))
di->di_printf("#%s", insn_fpaimm(insn));
else
di->di_printf("f%d", insn & 7);
break;
/* j - xtb rotate literal (bits 10-11) */
case 'j':
di->di_printf("ror #%d", ((insn >> 10) & 3) << 3);
break;
/* b - branch address */
case 'b':
branch = ((insn << 2) & 0x03ffffff);
if (branch & 0x02000000)
branch |= 0xfc000000;
di->di_printaddr(loc + 8 + branch);
break;
/* t - blx address */
case 't':
branch = ((insn << 2) & 0x03ffffff) |
(insn >> 23 & 0x00000002);
if (branch & 0x02000000)
branch |= 0xfc000000;
di->di_printaddr(loc + 8 + branch);
break;
/* X - block transfer type */
case 'X':
di->di_printf("%s", insn_blktrans(insn));
break;
/* Y - block transfer type (r13 base) */
case 'Y':
di->di_printf("%s", insn_stkblktrans(insn));
break;
/* c - comment field bits(0-23) */
case 'c':
di->di_printf("0x%08x", (insn & 0x00ffffff));
break;
/* k - breakpoint comment (bits 0-3, 8-19) */
case 'k':
di->di_printf("0x%04x",
(insn & 0x000fff00) >> 4 | (insn & 0x0000000f));
break;
/* p - saved or current status register */
case 'p':
if (insn & 0x00400000)
di->di_printf("spsr");
else
di->di_printf("cpsr");
break;
/* F - PSR transfer fields */
case 'F':
di->di_printf("_");
if (insn & (1 << 16))
di->di_printf("c");
if (insn & (1 << 17))
di->di_printf("x");
if (insn & (1 << 18))
di->di_printf("s");
if (insn & (1 << 19))
di->di_printf("f");
break;
/* B - byte transfer flag */
case 'B':
if (insn & 0x00400000)
di->di_printf("b");
break;
/* L - co-processor transfer size */
case 'L':
if (insn & (1 << 22))
di->di_printf("l");
break;
/* S - set status flag */
case 'S':
if (insn & 0x00100000)
di->di_printf("s");
break;
/* P - fp precision */
case 'P':
di->di_printf("%s", insn_fpaprec(insn));
break;
/* Q - fp precision (for ldf/stf) */
case 'Q':
break;
/* R - fp rounding */
case 'R':
di->di_printf("%s", insn_fparnd(insn));
break;
/* W - writeback flag */
case 'W':
if (insn & (1 << 21))
di->di_printf("!");
break;
/* # - co-processor number */
case '#':
di->di_printf("p%d", (insn >> 8) & 0x0f);
break;
/* v - co-processor data transfer registers+addressing mode */
case 'v':
disasm_insn_ldcstc(di, insn, loc);
break;
/* x - instruction in hex */
case 'x':
di->di_printf("0x%08x", insn);
break;
/* y - co-processor data processing registers */
case 'y':
di->di_printf("%d, ", (insn >> 20) & 0x0f);
di->di_printf("c%d, c%d, c%d", (insn >> 12) & 0x0f,
(insn >> 16) & 0x0f, insn & 0x0f);
di->di_printf(", %d", (insn >> 5) & 0x07);
break;
/* z - co-processor register transfer registers */
case 'z':
di->di_printf("%d, ", (insn >> 21) & 0x07);
di->di_printf("r%d, c%d, c%d, %d",
(insn >> 12) & 0x0f, (insn >> 16) & 0x0f,
insn & 0x0f, (insn >> 5) & 0x07);
/* if (((insn >> 5) & 0x07) != 0)
di->di_printf(", %d", (insn >> 5) & 0x07);*/
break;
default:
di->di_printf("[%c - unknown]", *f_ptr);
break;
}
if (*(f_ptr+1) >= 'A' && *(f_ptr+1) <= 'Z')
++f_ptr;
else if (*(++f_ptr)) {
++fmt;
if (fmt == 1)
di->di_printf("\t");
else
di->di_printf(", ");
}
};
di->di_printf("\n");
return(loc + INSN_SIZE);
}
static void
disasm_register_shift(const disasm_interface_t *di, u_int insn)
{
di->di_printf("r%d", (insn & 0x0f));
if ((insn & 0x00000ff0) == 0)
;
else if ((insn & 0x00000ff0) == 0x00000060)
di->di_printf(", rrx");
else {
if (insn & 0x10)
di->di_printf(", %s r%d", op2_shift(insn),
(insn >> 8) & 0x0f);
else
di->di_printf(", %s #%d", op2_shift(insn),
(insn >> 7) & 0x1f);
}
}
static void
disasm_print_reglist(const disasm_interface_t *di, u_int insn)
{
int loop;
int start;
int comma;
di->di_printf("{");
start = -1;
comma = 0;
for (loop = 0; loop < 17; ++loop) {
if (start != -1) {
if (loop == 16 || !(insn & (1 << loop))) {
if (comma)
di->di_printf(", ");
else
comma = 1;
if (start == loop - 1)
di->di_printf("r%d", start);
else
di->di_printf("r%d-r%d", start, loop - 1);
start = -1;
}
} else {
if (insn & (1 << loop))
start = loop;
}
}
di->di_printf("}");
if (insn & (1 << 22))
di->di_printf("^");
}
static void
disasm_insn_ldrstr(const disasm_interface_t *di, u_int insn, u_int loc)
{
int offset;
offset = insn & 0xfff;
if ((insn & 0x032f0000) == 0x010f0000) {
/* rA = pc, immediate index */
if (insn & 0x00800000)
loc += offset;
else
loc -= offset;
di->di_printaddr(loc + 8);
} else {
di->di_printf("[r%d", (insn >> 16) & 0x0f);
if ((insn & 0x03000fff) != 0x01000000) {
di->di_printf("%s, ", (insn & (1 << 24)) ? "" : "]");
if (!(insn & 0x00800000))
di->di_printf("-");
if (insn & (1 << 25))
disasm_register_shift(di, insn);
else
di->di_printf("#0x%03x", offset);
}
if (insn & (1 << 24))
di->di_printf("]");
}
}
static void
disasm_insn_ldrhstrh(const disasm_interface_t *di, u_int insn, u_int loc)
{
int offset;
offset = ((insn & 0xf00) >> 4) | (insn & 0xf);
if ((insn & 0x004f0000) == 0x004f0000) {
/* rA = pc, immediate index */
if (insn & 0x00800000)
loc += offset;
else
loc -= offset;
di->di_printaddr(loc + 8);
} else {
di->di_printf("[r%d", (insn >> 16) & 0x0f);
if ((insn & 0x01400f0f) != 0x01400000) {
di->di_printf("%s, ", (insn & (1 << 24)) ? "" : "]");
if (!(insn & 0x00800000))
di->di_printf("-");
if (insn & (1 << 22))
di->di_printf("#0x%02x", offset);
else
di->di_printf("r%d", (insn & 0x0f));
}
if (insn & (1 << 24))
di->di_printf("]");
}
}
static void
disasm_insn_ldcstc(const disasm_interface_t *di, u_int insn, u_int loc)
{
if (((insn >> 8) & 0xf) == 1)
di->di_printf("f%d, ", (insn >> 12) & 0x07);
else
di->di_printf("c%d, ", (insn >> 12) & 0x0f);
di->di_printf("[r%d", (insn >> 16) & 0x0f);
di->di_printf("%s, ", (insn & (1 << 24)) ? "" : "]");
if (!(insn & (1 << 23)))
di->di_printf("-");
di->di_printf("#0x%03x", (insn & 0xff) << 2);
if (insn & (1 << 24))
di->di_printf("]");
if (insn & (1 << 21))
di->di_printf("!");
}
static u_int
disassemble_readword(u_int address)
{
return(*((u_int *)address));
}
static void
disassemble_printaddr(u_int address)
{
printf("0x%08x", address);
}
static const disasm_interface_t disassemble_di = {
disassemble_readword, disassemble_printaddr, printf
};
void
disassemble(u_int address)
{
(void)disasm(&disassemble_di, address, 0);
}
/* End of disassem.c */
@@ -0,0 +1,65 @@
/* $NetBSD: disassem.h,v 1.4 2001/03/04 04:15:58 matt Exp $ */
/*-
* Copyright (c) 1997 Mark Brinicombe.
* Copyright (c) 1997 Causality Limited.
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Mark Brinicombe.
* 4. The name of the company nor the name of the author may be used to
* endorse or promote products derived from this software without specific
* prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* Define the interface structure required by the disassembler.
*
* $FreeBSD: /repoman/r/ncvs/src/sys/arm/include/disassem.h,v 1.2 2005/01/05 21:58:48 imp Exp $
*/
#ifndef ANDROID_MACHINE_DISASSEM_H
#define ANDROID_MACHINE_DISASSEM_H
#include <sys/types.h>
#if __cplusplus
extern "C" {
#endif
typedef struct {
u_int (*di_readword)(u_int);
void (*di_printaddr)(u_int);
void (*di_printf)(const char *, ...);
} disasm_interface_t;
/* Prototypes for callable functions */
u_int disasm(const disasm_interface_t *, u_int, int);
void disassemble(u_int);
#if __cplusplus
}
#endif
#endif /* !ANDROID_MACHINE_DISASSEM_H */
@@ -0,0 +1,378 @@
/* libs/pixelflinger/codeflinger/load_store.cpp
**
** Copyright 2006, The Android Open Source Project
**
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
**
** http://www.apache.org/licenses/LICENSE-2.0
**
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*/
#include <assert.h>
#include <stdio.h>
#include <cutils/log.h>
#include "codeflinger/GGLAssembler.h"
namespace android {
// ----------------------------------------------------------------------------
void GGLAssembler::store(const pointer_t& addr, const pixel_t& s, uint32_t flags)
{
const int bits = addr.size;
const int inc = (flags & WRITE_BACK)?1:0;
switch (bits) {
case 32:
if (inc) STR(AL, s.reg, addr.reg, immed12_post(4));
else STR(AL, s.reg, addr.reg);
break;
case 24:
// 24 bits formats are a little special and used only for RGB
// 0x00BBGGRR is unpacked as R,G,B
STRB(AL, s.reg, addr.reg, immed12_pre(0));
MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
STRB(AL, s.reg, addr.reg, immed12_pre(1));
MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
STRB(AL, s.reg, addr.reg, immed12_pre(2));
if (!(s.flags & CORRUPTIBLE)) {
MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 16));
}
if (inc)
ADD(AL, 0, addr.reg, addr.reg, imm(3));
break;
case 16:
if (inc) STRH(AL, s.reg, addr.reg, immed8_post(2));
else STRH(AL, s.reg, addr.reg);
break;
case 8:
if (inc) STRB(AL, s.reg, addr.reg, immed12_post(1));
else STRB(AL, s.reg, addr.reg);
break;
}
}
void GGLAssembler::load(const pointer_t& addr, const pixel_t& s, uint32_t flags)
{
Scratch scratches(registerFile());
int s0;
const int bits = addr.size;
const int inc = (flags & WRITE_BACK)?1:0;
switch (bits) {
case 32:
if (inc) LDR(AL, s.reg, addr.reg, immed12_post(4));
else LDR(AL, s.reg, addr.reg);
break;
case 24:
// 24 bits formats are a little special and used only for RGB
// R,G,B is packed as 0x00BBGGRR
s0 = scratches.obtain();
if (s.reg != addr.reg) {
LDRB(AL, s.reg, addr.reg, immed12_pre(0)); // R
LDRB(AL, s0, addr.reg, immed12_pre(1)); // G
ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 8));
LDRB(AL, s0, addr.reg, immed12_pre(2)); // B
ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 16));
} else {
int s1 = scratches.obtain();
LDRB(AL, s1, addr.reg, immed12_pre(0)); // R
LDRB(AL, s0, addr.reg, immed12_pre(1)); // G
ORR(AL, 0, s1, s1, reg_imm(s0, LSL, 8));
LDRB(AL, s0, addr.reg, immed12_pre(2)); // B
ORR(AL, 0, s.reg, s1, reg_imm(s0, LSL, 16));
}
if (inc)
ADD(AL, 0, addr.reg, addr.reg, imm(3));
break;
case 16:
if (inc) LDRH(AL, s.reg, addr.reg, immed8_post(2));
else LDRH(AL, s.reg, addr.reg);
break;
case 8:
if (inc) LDRB(AL, s.reg, addr.reg, immed12_post(1));
else LDRB(AL, s.reg, addr.reg);
break;
}
}
void GGLAssembler::extract(integer_t& d, int s, int h, int l, int bits)
{
const int maskLen = h-l;
assert(maskLen<=8);
assert(h);
if (h != bits) {
const int mask = ((1<<maskLen)-1) << l;
if (isValidImmediate(mask)) {
AND(AL, 0, d.reg, s, imm(mask)); // component = packed & mask;
} else if (isValidImmediate(~mask)) {
BIC(AL, 0, d.reg, s, imm(~mask)); // component = packed & mask;
} else {
MOV(AL, 0, d.reg, reg_imm(s, LSL, 32-h));
l += 32-h;
h = 32;
}
s = d.reg;
}
if (l) {
MOV(AL, 0, d.reg, reg_imm(s, LSR, l)); // component = packed >> l;
s = d.reg;
}
if (s != d.reg) {
MOV(AL, 0, d.reg, s);
}
d.s = maskLen;
}
void GGLAssembler::extract(integer_t& d, const pixel_t& s, int component)
{
extract(d, s.reg,
s.format.c[component].h,
s.format.c[component].l,
s.size());
}
void GGLAssembler::extract(component_t& d, const pixel_t& s, int component)
{
integer_t r(d.reg, 32, d.flags);
extract(r, s.reg,
s.format.c[component].h,
s.format.c[component].l,
s.size());
d = component_t(r);
}
void GGLAssembler::expand(integer_t& d, const component_t& s, int dbits)
{
if (s.l || (s.flags & CLEAR_HI)) {
extract(d, s.reg, s.h, s.l, 32);
expand(d, d, dbits);
} else {
expand(d, integer_t(s.reg, s.size(), s.flags), dbits);
}
}
void GGLAssembler::expand(component_t& d, const component_t& s, int dbits)
{
integer_t r(d.reg, 32, d.flags);
expand(r, s, dbits);
d = component_t(r);
}
void GGLAssembler::expand(integer_t& dst, const integer_t& src, int dbits)
{
assert(src.size());
int sbits = src.size();
int s = src.reg;
int d = dst.reg;
// be sure to set 'dst' after we read 'src' as they may be identical
dst.s = dbits;
dst.flags = 0;
if (dbits<=sbits) {
if (s != d) {
MOV(AL, 0, d, s);
}
return;
}
if (sbits == 1) {
RSB(AL, 0, d, s, reg_imm(s, LSL, dbits));
// d = (s<<dbits) - s;
return;
}
if (dbits % sbits) {
MOV(AL, 0, d, reg_imm(s, LSL, dbits-sbits));
// d = s << (dbits-sbits);
dbits -= sbits;
do {
ORR(AL, 0, d, d, reg_imm(d, LSR, sbits));
// d |= d >> sbits;
dbits -= sbits;
sbits *= 2;
} while(dbits>0);
return;
}
dbits -= sbits;
do {
ORR(AL, 0, d, s, reg_imm(s, LSL, sbits));
// d |= d<<sbits;
s = d;
dbits -= sbits;
if (sbits*2 < dbits) {
sbits *= 2;
}
} while(dbits>0);
}
void GGLAssembler::downshift(
pixel_t& d, int component, component_t s, const reg_t& dither)
{
const needs_t& needs = mBuilderContext.needs;
Scratch scratches(registerFile());
int sh = s.h;
int sl = s.l;
int maskHiBits = (sh!=32) ? ((s.flags & CLEAR_HI)?1:0) : 0;
int maskLoBits = (sl!=0) ? ((s.flags & CLEAR_LO)?1:0) : 0;
int sbits = sh - sl;
int dh = d.format.c[component].h;
int dl = d.format.c[component].l;
int dbits = dh - dl;
int dithering = 0;
LOGE_IF(sbits<dbits, "sbits (%d) < dbits (%d) in downshift", sbits, dbits);
if (sbits>dbits) {
// see if we need to dither
dithering = mDithering;
}
int ireg = d.reg;
if (!(d.flags & FIRST)) {
if (s.flags & CORRUPTIBLE) {
ireg = s.reg;
} else {
ireg = scratches.obtain();
}
}
d.flags &= ~FIRST;
if (maskHiBits) {
// we need to mask the high bits (and possibly the lowbits too)
// and we might be able to use immediate mask.
if (!dithering) {
// we don't do this if we only have maskLoBits because we can
// do it more efficiently below (in the case where dl=0)
const int offset = sh - dbits;
if (dbits<=8 && offset >= 0) {
const uint32_t mask = ((1<<dbits)-1) << offset;
if (isValidImmediate(mask) || isValidImmediate(~mask)) {
build_and_immediate(ireg, s.reg, mask, 32);
sl = offset;
s.reg = ireg;
sbits = dbits;
maskLoBits = maskHiBits = 0;
}
}
} else {
// in the dithering case though, we need to preserve the lower bits
const uint32_t mask = ((1<<sbits)-1) << sl;
if (isValidImmediate(mask) || isValidImmediate(~mask)) {
build_and_immediate(ireg, s.reg, mask, 32);
s.reg = ireg;
maskLoBits = maskHiBits = 0;
}
}
}
// XXX: we could special case (maskHiBits & !maskLoBits)
// like we do for maskLoBits below, but it happens very rarely
// that we have maskHiBits only and the conditions necessary to lead
// to better code (like doing d |= s << 24)
if (maskHiBits) {
MOV(AL, 0, ireg, reg_imm(s.reg, LSL, 32-sh));
sl += 32-sh;
sh = 32;
s.reg = ireg;
maskHiBits = 0;
}
// Downsampling should be performed as follows:
// V * ((1<<dbits)-1) / ((1<<sbits)-1)
// V * [(1<<dbits)/((1<<sbits)-1) - 1/((1<<sbits)-1)]
// V * [1/((1<<sbits)-1)>>dbits - 1/((1<<sbits)-1)]
// V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/((1<<sbits)-1)>>sbits
// V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/(1-(1>>sbits))
//
// By approximating (1>>dbits) and (1>>sbits) to 0:
//
// V>>(sbits-dbits) - V>>sbits
//
// A good approximation is V>>(sbits-dbits),
// but better one (needed for dithering) is:
//
// (V>>(sbits-dbits)<<sbits - V)>>sbits
// (V<<dbits - V)>>sbits
// (V - V>>dbits)>>(sbits-dbits)
// Dithering is done here
if (dithering) {
comment("dithering");
if (sl) {
MOV(AL, 0, ireg, reg_imm(s.reg, LSR, sl));
sh -= sl;
sl = 0;
s.reg = ireg;
}
// scaling (V-V>>dbits)
SUB(AL, 0, ireg, s.reg, reg_imm(s.reg, LSR, dbits));
const int shift = (GGL_DITHER_BITS - (sbits-dbits));
if (shift>0) ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSR, shift));
else if (shift<0) ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSL,-shift));
else ADD(AL, 0, ireg, ireg, dither.reg);
s.reg = ireg;
}
if ((maskLoBits|dithering) && (sh > dbits)) {
int shift = sh-dbits;
if (dl) {
MOV(AL, 0, ireg, reg_imm(s.reg, LSR, shift));
if (ireg == d.reg) {
MOV(AL, 0, d.reg, reg_imm(ireg, LSL, dl));
} else {
ORR(AL, 0, d.reg, d.reg, reg_imm(ireg, LSL, dl));
}
} else {
if (ireg == d.reg) {
MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
} else {
ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
}
}
} else {
int shift = sh-dh;
if (shift>0) {
if (ireg == d.reg) {
MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
} else {
ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
}
} else if (shift<0) {
if (ireg == d.reg) {
MOV(AL, 0, d.reg, reg_imm(s.reg, LSL, -shift));
} else {
ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSL, -shift));
}
} else {
if (ireg == d.reg) {
if (s.reg != d.reg) {
MOV(AL, 0, d.reg, s.reg);
}
} else {
ORR(AL, 0, d.reg, d.reg, s.reg);
}
}
}
}
}; // namespace android
File diff suppressed because it is too large Load Diff