M7350v1_en_gpl

This commit is contained in:
T
2024-09-09 08:52:07 +00:00
commit f9cc65cfda
65988 changed files with 26357421 additions and 0 deletions
+19
View File
@@ -0,0 +1,19 @@
#
# Makefile for SGI's XP devices.
#
obj-$(CONFIG_SGI_XP) += xp.o
xp-y := xp_main.o
xp-$(CONFIG_IA64_SGI_SN2) += xp_sn2.o xp_nofault.o
xp-$(CONFIG_IA64_GENERIC) += xp_sn2.o xp_nofault.o
xp-$(CONFIG_IA64_SGI_UV) += xp_uv.o
xp-$(CONFIG_X86_64) += xp_uv.o
obj-$(CONFIG_SGI_XP) += xpc.o
xpc-y := xpc_main.o xpc_channel.o xpc_partition.o
xpc-$(CONFIG_IA64_SGI_SN2) += xpc_sn2.o
xpc-$(CONFIG_IA64_GENERIC) += xpc_sn2.o
xpc-$(CONFIG_IA64_SGI_UV) += xpc_uv.o
xpc-$(CONFIG_X86_64) += xpc_uv.o
obj-$(CONFIG_SGI_XP) += xpnet.o
+358
View File
@@ -0,0 +1,358 @@
/*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
* Copyright (C) 2004-2008 Silicon Graphics, Inc. All rights reserved.
*/
/*
* External Cross Partition (XP) structures and defines.
*/
#ifndef _DRIVERS_MISC_SGIXP_XP_H
#define _DRIVERS_MISC_SGIXP_XP_H
#include <linux/mutex.h>
#if defined CONFIG_X86_UV || defined CONFIG_IA64_SGI_UV
#include <asm/uv/uv.h>
#define is_uv() is_uv_system()
#endif
#ifndef is_uv
#define is_uv() 0
#endif
#if defined CONFIG_IA64
#include <asm/sn/arch.h> /* defines is_shub1() and is_shub2() */
#define is_shub() ia64_platform_is("sn2")
#endif
#ifndef is_shub1
#define is_shub1() 0
#endif
#ifndef is_shub2
#define is_shub2() 0
#endif
#ifndef is_shub
#define is_shub() 0
#endif
#ifdef USE_DBUG_ON
#define DBUG_ON(condition) BUG_ON(condition)
#else
#define DBUG_ON(condition)
#endif
/*
* Define the maximum number of partitions the system can possibly support.
* It is based on the maximum number of hardware partitionable regions. The
* term 'region' in this context refers to the minimum number of nodes that
* can comprise an access protection grouping. The access protection is in
* regards to memory, IPI and IOI.
*
* The maximum number of hardware partitionable regions is equal to the
* maximum number of nodes in the entire system divided by the minimum number
* of nodes that comprise an access protection grouping.
*/
#define XP_MAX_NPARTITIONS_SN2 64
#define XP_MAX_NPARTITIONS_UV 256
/*
* XPC establishes channel connections between the local partition and any
* other partition that is currently up. Over these channels, kernel-level
* `users' can communicate with their counterparts on the other partitions.
*
* If the need for additional channels arises, one can simply increase
* XPC_MAX_NCHANNELS accordingly. If the day should come where that number
* exceeds the absolute MAXIMUM number of channels possible (eight), then one
* will need to make changes to the XPC code to accommodate for this.
*
* The absolute maximum number of channels possible is limited to eight for
* performance reasons on sn2 hardware. The internal cross partition structures
* require sixteen bytes per channel, and eight allows all of this
* interface-shared info to fit in one 128-byte cacheline.
*/
#define XPC_MEM_CHANNEL 0 /* memory channel number */
#define XPC_NET_CHANNEL 1 /* network channel number */
#define XPC_MAX_NCHANNELS 2 /* max #of channels allowed */
#if XPC_MAX_NCHANNELS > 8
#error XPC_MAX_NCHANNELS exceeds absolute MAXIMUM possible.
#endif
/*
* Define macro, XPC_MSG_SIZE(), is provided for the user
* that wants to fit as many msg entries as possible in a given memory size
* (e.g. a memory page).
*/
#define XPC_MSG_MAX_SIZE 128
#define XPC_MSG_HDR_MAX_SIZE 16
#define XPC_MSG_PAYLOAD_MAX_SIZE (XPC_MSG_MAX_SIZE - XPC_MSG_HDR_MAX_SIZE)
#define XPC_MSG_SIZE(_payload_size) \
ALIGN(XPC_MSG_HDR_MAX_SIZE + (_payload_size), \
is_uv() ? 64 : 128)
/*
* Define the return values and values passed to user's callout functions.
* (It is important to add new value codes at the end just preceding
* xpUnknownReason, which must have the highest numerical value.)
*/
enum xp_retval {
xpSuccess = 0,
xpNotConnected, /* 1: channel is not connected */
xpConnected, /* 2: channel connected (opened) */
xpRETIRED1, /* 3: (formerly xpDisconnected) */
xpMsgReceived, /* 4: message received */
xpMsgDelivered, /* 5: message delivered and acknowledged */
xpRETIRED2, /* 6: (formerly xpTransferFailed) */
xpNoWait, /* 7: operation would require wait */
xpRetry, /* 8: retry operation */
xpTimeout, /* 9: timeout in xpc_allocate_msg_wait() */
xpInterrupted, /* 10: interrupted wait */
xpUnequalMsgSizes, /* 11: message size disparity between sides */
xpInvalidAddress, /* 12: invalid address */
xpNoMemory, /* 13: no memory available for XPC structures */
xpLackOfResources, /* 14: insufficient resources for operation */
xpUnregistered, /* 15: channel is not registered */
xpAlreadyRegistered, /* 16: channel is already registered */
xpPartitionDown, /* 17: remote partition is down */
xpNotLoaded, /* 18: XPC module is not loaded */
xpUnloading, /* 19: this side is unloading XPC module */
xpBadMagic, /* 20: XPC MAGIC string not found */
xpReactivating, /* 21: remote partition was reactivated */
xpUnregistering, /* 22: this side is unregistering channel */
xpOtherUnregistering, /* 23: other side is unregistering channel */
xpCloneKThread, /* 24: cloning kernel thread */
xpCloneKThreadFailed, /* 25: cloning kernel thread failed */
xpNoHeartbeat, /* 26: remote partition has no heartbeat */
xpPioReadError, /* 27: PIO read error */
xpPhysAddrRegFailed, /* 28: registration of phys addr range failed */
xpRETIRED3, /* 29: (formerly xpBteDirectoryError) */
xpRETIRED4, /* 30: (formerly xpBtePoisonError) */
xpRETIRED5, /* 31: (formerly xpBteWriteError) */
xpRETIRED6, /* 32: (formerly xpBteAccessError) */
xpRETIRED7, /* 33: (formerly xpBtePWriteError) */
xpRETIRED8, /* 34: (formerly xpBtePReadError) */
xpRETIRED9, /* 35: (formerly xpBteTimeOutError) */
xpRETIRED10, /* 36: (formerly xpBteXtalkError) */
xpRETIRED11, /* 37: (formerly xpBteNotAvailable) */
xpRETIRED12, /* 38: (formerly xpBteUnmappedError) */
xpBadVersion, /* 39: bad version number */
xpVarsNotSet, /* 40: the XPC variables are not set up */
xpNoRsvdPageAddr, /* 41: unable to get rsvd page's phys addr */
xpInvalidPartid, /* 42: invalid partition ID */
xpLocalPartid, /* 43: local partition ID */
xpOtherGoingDown, /* 44: other side going down, reason unknown */
xpSystemGoingDown, /* 45: system is going down, reason unknown */
xpSystemHalt, /* 46: system is being halted */
xpSystemReboot, /* 47: system is being rebooted */
xpSystemPoweroff, /* 48: system is being powered off */
xpDisconnecting, /* 49: channel disconnecting (closing) */
xpOpenCloseError, /* 50: channel open/close protocol error */
xpDisconnected, /* 51: channel disconnected (closed) */
xpBteCopyError, /* 52: bte_copy() returned error */
xpSalError, /* 53: sn SAL error */
xpRsvdPageNotSet, /* 54: the reserved page is not set up */
xpPayloadTooBig, /* 55: payload too large for message slot */
xpUnsupported, /* 56: unsupported functionality or resource */
xpNeedMoreInfo, /* 57: more info is needed by SAL */
xpGruCopyError, /* 58: gru_copy_gru() returned error */
xpGruSendMqError, /* 59: gru send message queue related error */
xpBadChannelNumber, /* 60: invalid channel number */
xpBadMsgType, /* 61: invalid message type */
xpBiosError, /* 62: BIOS error */
xpUnknownReason /* 63: unknown reason - must be last in enum */
};
/*
* Define the callout function type used by XPC to update the user on
* connection activity and state changes via the user function registered
* by xpc_connect().
*
* Arguments:
*
* reason - reason code.
* partid - partition ID associated with condition.
* ch_number - channel # associated with condition.
* data - pointer to optional data.
* key - pointer to optional user-defined value provided as the "key"
* argument to xpc_connect().
*
* A reason code of xpConnected indicates that a connection has been
* established to the specified partition on the specified channel. The data
* argument indicates the max number of entries allowed in the message queue.
*
* A reason code of xpMsgReceived indicates that a XPC message arrived from
* the specified partition on the specified channel. The data argument
* specifies the address of the message's payload. The user must call
* xpc_received() when finished with the payload.
*
* All other reason codes indicate failure. The data argmument is NULL.
* When a failure reason code is received, one can assume that the channel
* is not connected.
*/
typedef void (*xpc_channel_func) (enum xp_retval reason, short partid,
int ch_number, void *data, void *key);
/*
* Define the callout function type used by XPC to notify the user of
* messages received and delivered via the user function registered by
* xpc_send_notify().
*
* Arguments:
*
* reason - reason code.
* partid - partition ID associated with condition.
* ch_number - channel # associated with condition.
* key - pointer to optional user-defined value provided as the "key"
* argument to xpc_send_notify().
*
* A reason code of xpMsgDelivered indicates that the message was delivered
* to the intended recipient and that they have acknowledged its receipt by
* calling xpc_received().
*
* All other reason codes indicate failure.
*
* NOTE: The user defined function must be callable by an interrupt handler
* and thus cannot block.
*/
typedef void (*xpc_notify_func) (enum xp_retval reason, short partid,
int ch_number, void *key);
/*
* The following is a registration entry. There is a global array of these,
* one per channel. It is used to record the connection registration made
* by the users of XPC. As long as a registration entry exists, for any
* partition that comes up, XPC will attempt to establish a connection on
* that channel. Notification that a connection has been made will occur via
* the xpc_channel_func function.
*
* The 'func' field points to the function to call when aynchronous
* notification is required for such events as: a connection established/lost,
* or an incoming message received, or an error condition encountered. A
* non-NULL 'func' field indicates that there is an active registration for
* the channel.
*/
struct xpc_registration {
struct mutex mutex;
xpc_channel_func func; /* function to call */
void *key; /* pointer to user's key */
u16 nentries; /* #of msg entries in local msg queue */
u16 entry_size; /* message queue's message entry size */
u32 assigned_limit; /* limit on #of assigned kthreads */
u32 idle_limit; /* limit on #of idle kthreads */
} ____cacheline_aligned;
#define XPC_CHANNEL_REGISTERED(_c) (xpc_registrations[_c].func != NULL)
/* the following are valid xpc_send() or xpc_send_notify() flags */
#define XPC_WAIT 0 /* wait flag */
#define XPC_NOWAIT 1 /* no wait flag */
struct xpc_interface {
void (*connect) (int);
void (*disconnect) (int);
enum xp_retval (*send) (short, int, u32, void *, u16);
enum xp_retval (*send_notify) (short, int, u32, void *, u16,
xpc_notify_func, void *);
void (*received) (short, int, void *);
enum xp_retval (*partid_to_nasids) (short, void *);
};
extern struct xpc_interface xpc_interface;
extern void xpc_set_interface(void (*)(int),
void (*)(int),
enum xp_retval (*)(short, int, u32, void *, u16),
enum xp_retval (*)(short, int, u32, void *, u16,
xpc_notify_func, void *),
void (*)(short, int, void *),
enum xp_retval (*)(short, void *));
extern void xpc_clear_interface(void);
extern enum xp_retval xpc_connect(int, xpc_channel_func, void *, u16,
u16, u32, u32);
extern void xpc_disconnect(int);
static inline enum xp_retval
xpc_send(short partid, int ch_number, u32 flags, void *payload,
u16 payload_size)
{
return xpc_interface.send(partid, ch_number, flags, payload,
payload_size);
}
static inline enum xp_retval
xpc_send_notify(short partid, int ch_number, u32 flags, void *payload,
u16 payload_size, xpc_notify_func func, void *key)
{
return xpc_interface.send_notify(partid, ch_number, flags, payload,
payload_size, func, key);
}
static inline void
xpc_received(short partid, int ch_number, void *payload)
{
return xpc_interface.received(partid, ch_number, payload);
}
static inline enum xp_retval
xpc_partid_to_nasids(short partid, void *nasids)
{
return xpc_interface.partid_to_nasids(partid, nasids);
}
extern short xp_max_npartitions;
extern short xp_partition_id;
extern u8 xp_region_size;
extern unsigned long (*xp_pa) (void *);
extern unsigned long (*xp_socket_pa) (unsigned long);
extern enum xp_retval (*xp_remote_memcpy) (unsigned long, const unsigned long,
size_t);
extern int (*xp_cpu_to_nasid) (int);
extern enum xp_retval (*xp_expand_memprotect) (unsigned long, unsigned long);
extern enum xp_retval (*xp_restrict_memprotect) (unsigned long, unsigned long);
extern u64 xp_nofault_PIOR_target;
extern int xp_nofault_PIOR(void *);
extern int xp_error_PIOR(void);
extern struct device *xp;
extern enum xp_retval xp_init_sn2(void);
extern enum xp_retval xp_init_uv(void);
extern void xp_exit_sn2(void);
extern void xp_exit_uv(void);
#endif /* _DRIVERS_MISC_SGIXP_XP_H */
+286
View File
@@ -0,0 +1,286 @@
/*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
* Copyright (c) 2004-2008 Silicon Graphics, Inc. All Rights Reserved.
*/
/*
* Cross Partition (XP) base.
*
* XP provides a base from which its users can interact
* with XPC, yet not be dependent on XPC.
*
*/
#include <linux/module.h>
#include <linux/device.h>
#include "xp.h"
/* define the XP debug device structures to be used with dev_dbg() et al */
struct device_driver xp_dbg_name = {
.name = "xp"
};
struct device xp_dbg_subname = {
.init_name = "", /* set to "" */
.driver = &xp_dbg_name
};
struct device *xp = &xp_dbg_subname;
/* max #of partitions possible */
short xp_max_npartitions;
EXPORT_SYMBOL_GPL(xp_max_npartitions);
short xp_partition_id;
EXPORT_SYMBOL_GPL(xp_partition_id);
u8 xp_region_size;
EXPORT_SYMBOL_GPL(xp_region_size);
unsigned long (*xp_pa) (void *addr);
EXPORT_SYMBOL_GPL(xp_pa);
unsigned long (*xp_socket_pa) (unsigned long gpa);
EXPORT_SYMBOL_GPL(xp_socket_pa);
enum xp_retval (*xp_remote_memcpy) (unsigned long dst_gpa,
const unsigned long src_gpa, size_t len);
EXPORT_SYMBOL_GPL(xp_remote_memcpy);
int (*xp_cpu_to_nasid) (int cpuid);
EXPORT_SYMBOL_GPL(xp_cpu_to_nasid);
enum xp_retval (*xp_expand_memprotect) (unsigned long phys_addr,
unsigned long size);
EXPORT_SYMBOL_GPL(xp_expand_memprotect);
enum xp_retval (*xp_restrict_memprotect) (unsigned long phys_addr,
unsigned long size);
EXPORT_SYMBOL_GPL(xp_restrict_memprotect);
/*
* xpc_registrations[] keeps track of xpc_connect()'s done by the kernel-level
* users of XPC.
*/
struct xpc_registration xpc_registrations[XPC_MAX_NCHANNELS];
EXPORT_SYMBOL_GPL(xpc_registrations);
/*
* Initialize the XPC interface to indicate that XPC isn't loaded.
*/
static enum xp_retval
xpc_notloaded(void)
{
return xpNotLoaded;
}
struct xpc_interface xpc_interface = {
(void (*)(int))xpc_notloaded,
(void (*)(int))xpc_notloaded,
(enum xp_retval(*)(short, int, u32, void *, u16))xpc_notloaded,
(enum xp_retval(*)(short, int, u32, void *, u16, xpc_notify_func,
void *))xpc_notloaded,
(void (*)(short, int, void *))xpc_notloaded,
(enum xp_retval(*)(short, void *))xpc_notloaded
};
EXPORT_SYMBOL_GPL(xpc_interface);
/*
* XPC calls this when it (the XPC module) has been loaded.
*/
void
xpc_set_interface(void (*connect) (int),
void (*disconnect) (int),
enum xp_retval (*send) (short, int, u32, void *, u16),
enum xp_retval (*send_notify) (short, int, u32, void *, u16,
xpc_notify_func, void *),
void (*received) (short, int, void *),
enum xp_retval (*partid_to_nasids) (short, void *))
{
xpc_interface.connect = connect;
xpc_interface.disconnect = disconnect;
xpc_interface.send = send;
xpc_interface.send_notify = send_notify;
xpc_interface.received = received;
xpc_interface.partid_to_nasids = partid_to_nasids;
}
EXPORT_SYMBOL_GPL(xpc_set_interface);
/*
* XPC calls this when it (the XPC module) is being unloaded.
*/
void
xpc_clear_interface(void)
{
xpc_interface.connect = (void (*)(int))xpc_notloaded;
xpc_interface.disconnect = (void (*)(int))xpc_notloaded;
xpc_interface.send = (enum xp_retval(*)(short, int, u32, void *, u16))
xpc_notloaded;
xpc_interface.send_notify = (enum xp_retval(*)(short, int, u32, void *,
u16, xpc_notify_func,
void *))xpc_notloaded;
xpc_interface.received = (void (*)(short, int, void *))
xpc_notloaded;
xpc_interface.partid_to_nasids = (enum xp_retval(*)(short, void *))
xpc_notloaded;
}
EXPORT_SYMBOL_GPL(xpc_clear_interface);
/*
* Register for automatic establishment of a channel connection whenever
* a partition comes up.
*
* Arguments:
*
* ch_number - channel # to register for connection.
* func - function to call for asynchronous notification of channel
* state changes (i.e., connection, disconnection, error) and
* the arrival of incoming messages.
* key - pointer to optional user-defined value that gets passed back
* to the user on any callouts made to func.
* payload_size - size in bytes of the XPC message's payload area which
* contains a user-defined message. The user should make
* this large enough to hold their largest message.
* nentries - max #of XPC message entries a message queue can contain.
* The actual number, which is determined when a connection
* is established and may be less then requested, will be
* passed to the user via the xpConnected callout.
* assigned_limit - max number of kthreads allowed to be processing
* messages (per connection) at any given instant.
* idle_limit - max number of kthreads allowed to be idle at any given
* instant.
*/
enum xp_retval
xpc_connect(int ch_number, xpc_channel_func func, void *key, u16 payload_size,
u16 nentries, u32 assigned_limit, u32 idle_limit)
{
struct xpc_registration *registration;
DBUG_ON(ch_number < 0 || ch_number >= XPC_MAX_NCHANNELS);
DBUG_ON(payload_size == 0 || nentries == 0);
DBUG_ON(func == NULL);
DBUG_ON(assigned_limit == 0 || idle_limit > assigned_limit);
if (XPC_MSG_SIZE(payload_size) > XPC_MSG_MAX_SIZE)
return xpPayloadTooBig;
registration = &xpc_registrations[ch_number];
if (mutex_lock_interruptible(&registration->mutex) != 0)
return xpInterrupted;
/* if XPC_CHANNEL_REGISTERED(ch_number) */
if (registration->func != NULL) {
mutex_unlock(&registration->mutex);
return xpAlreadyRegistered;
}
/* register the channel for connection */
registration->entry_size = XPC_MSG_SIZE(payload_size);
registration->nentries = nentries;
registration->assigned_limit = assigned_limit;
registration->idle_limit = idle_limit;
registration->key = key;
registration->func = func;
mutex_unlock(&registration->mutex);
xpc_interface.connect(ch_number);
return xpSuccess;
}
EXPORT_SYMBOL_GPL(xpc_connect);
/*
* Remove the registration for automatic connection of the specified channel
* when a partition comes up.
*
* Before returning this xpc_disconnect() will wait for all connections on the
* specified channel have been closed/torndown. So the caller can be assured
* that they will not be receiving any more callouts from XPC to their
* function registered via xpc_connect().
*
* Arguments:
*
* ch_number - channel # to unregister.
*/
void
xpc_disconnect(int ch_number)
{
struct xpc_registration *registration;
DBUG_ON(ch_number < 0 || ch_number >= XPC_MAX_NCHANNELS);
registration = &xpc_registrations[ch_number];
/*
* We've decided not to make this a down_interruptible(), since we
* figured XPC's users will just turn around and call xpc_disconnect()
* again anyways, so we might as well wait, if need be.
*/
mutex_lock(&registration->mutex);
/* if !XPC_CHANNEL_REGISTERED(ch_number) */
if (registration->func == NULL) {
mutex_unlock(&registration->mutex);
return;
}
/* remove the connection registration for the specified channel */
registration->func = NULL;
registration->key = NULL;
registration->nentries = 0;
registration->entry_size = 0;
registration->assigned_limit = 0;
registration->idle_limit = 0;
xpc_interface.disconnect(ch_number);
mutex_unlock(&registration->mutex);
return;
}
EXPORT_SYMBOL_GPL(xpc_disconnect);
int __init
xp_init(void)
{
enum xp_retval ret;
int ch_number;
/* initialize the connection registration mutex */
for (ch_number = 0; ch_number < XPC_MAX_NCHANNELS; ch_number++)
mutex_init(&xpc_registrations[ch_number].mutex);
if (is_shub())
ret = xp_init_sn2();
else if (is_uv())
ret = xp_init_uv();
else
ret = 0;
if (ret != xpSuccess)
return ret;
return 0;
}
module_init(xp_init);
void __exit
xp_exit(void)
{
if (is_shub())
xp_exit_sn2();
else if (is_uv())
xp_exit_uv();
}
module_exit(xp_exit);
MODULE_AUTHOR("Silicon Graphics, Inc.");
MODULE_DESCRIPTION("Cross Partition (XP) base");
MODULE_LICENSE("GPL");
+35
View File
@@ -0,0 +1,35 @@
/*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
* Copyright (c) 2004-2008 Silicon Graphics, Inc. All Rights Reserved.
*/
/*
* The xp_nofault_PIOR function takes a pointer to a remote PIO register
* and attempts to load and consume a value from it. This function
* will be registered as a nofault code block. In the event that the
* PIO read fails, the MCA handler will force the error to look
* corrected and vector to the xp_error_PIOR which will return an error.
*
* The definition of "consumption" and the time it takes for an MCA
* to surface is processor implementation specific. This code
* is sufficient on Itanium through the Montvale processor family.
* It may need to be adjusted for future processor implementations.
*
* extern int xp_nofault_PIOR(void *remote_register);
*/
.global xp_nofault_PIOR
xp_nofault_PIOR:
mov r8=r0 // Stage a success return value
ld8.acq r9=[r32];; // PIO Read the specified register
adds r9=1,r9;; // Add to force consumption
srlz.i;; // Allow time for MCA to surface
br.ret.sptk.many b0;; // Return success
.global xp_error_PIOR
xp_error_PIOR:
mov r8=1 // Return value of 1
br.ret.sptk.many b0;; // Return failure
+190
View File
@@ -0,0 +1,190 @@
/*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
* Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
*/
/*
* Cross Partition (XP) sn2-based functions.
*
* Architecture specific implementation of common functions.
*/
#include <linux/module.h>
#include <linux/device.h>
#include <asm/sn/bte.h>
#include <asm/sn/sn_sal.h>
#include "xp.h"
/*
* The export of xp_nofault_PIOR needs to happen here since it is defined
* in drivers/misc/sgi-xp/xp_nofault.S. The target of the nofault read is
* defined here.
*/
EXPORT_SYMBOL_GPL(xp_nofault_PIOR);
u64 xp_nofault_PIOR_target;
EXPORT_SYMBOL_GPL(xp_nofault_PIOR_target);
/*
* Register a nofault code region which performs a cross-partition PIO read.
* If the PIO read times out, the MCA handler will consume the error and
* return to a kernel-provided instruction to indicate an error. This PIO read
* exists because it is guaranteed to timeout if the destination is down
* (amo operations do not timeout on at least some CPUs on Shubs <= v1.2,
* which unfortunately we have to work around).
*/
static enum xp_retval
xp_register_nofault_code_sn2(void)
{
int ret;
u64 func_addr;
u64 err_func_addr;
func_addr = *(u64 *)xp_nofault_PIOR;
err_func_addr = *(u64 *)xp_error_PIOR;
ret = sn_register_nofault_code(func_addr, err_func_addr, err_func_addr,
1, 1);
if (ret != 0) {
dev_err(xp, "can't register nofault code, error=%d\n", ret);
return xpSalError;
}
/*
* Setup the nofault PIO read target. (There is no special reason why
* SH_IPI_ACCESS was selected.)
*/
if (is_shub1())
xp_nofault_PIOR_target = SH1_IPI_ACCESS;
else if (is_shub2())
xp_nofault_PIOR_target = SH2_IPI_ACCESS0;
return xpSuccess;
}
static void
xp_unregister_nofault_code_sn2(void)
{
u64 func_addr = *(u64 *)xp_nofault_PIOR;
u64 err_func_addr = *(u64 *)xp_error_PIOR;
/* unregister the PIO read nofault code region */
(void)sn_register_nofault_code(func_addr, err_func_addr,
err_func_addr, 1, 0);
}
/*
* Convert a virtual memory address to a physical memory address.
*/
static unsigned long
xp_pa_sn2(void *addr)
{
return __pa(addr);
}
/*
* Convert a global physical to a socket physical address.
*/
static unsigned long
xp_socket_pa_sn2(unsigned long gpa)
{
return gpa;
}
/*
* Wrapper for bte_copy().
*
* dst_pa - physical address of the destination of the transfer.
* src_pa - physical address of the source of the transfer.
* len - number of bytes to transfer from source to destination.
*
* Note: xp_remote_memcpy_sn2() should never be called while holding a spinlock.
*/
static enum xp_retval
xp_remote_memcpy_sn2(unsigned long dst_pa, const unsigned long src_pa,
size_t len)
{
bte_result_t ret;
ret = bte_copy(src_pa, dst_pa, len, (BTE_NOTIFY | BTE_WACQUIRE), NULL);
if (ret == BTE_SUCCESS)
return xpSuccess;
if (is_shub2()) {
dev_err(xp, "bte_copy() on shub2 failed, error=0x%x dst_pa="
"0x%016lx src_pa=0x%016lx len=%ld\\n", ret, dst_pa,
src_pa, len);
} else {
dev_err(xp, "bte_copy() failed, error=%d dst_pa=0x%016lx "
"src_pa=0x%016lx len=%ld\\n", ret, dst_pa, src_pa, len);
}
return xpBteCopyError;
}
static int
xp_cpu_to_nasid_sn2(int cpuid)
{
return cpuid_to_nasid(cpuid);
}
static enum xp_retval
xp_expand_memprotect_sn2(unsigned long phys_addr, unsigned long size)
{
u64 nasid_array = 0;
int ret;
ret = sn_change_memprotect(phys_addr, size, SN_MEMPROT_ACCESS_CLASS_1,
&nasid_array);
if (ret != 0) {
dev_err(xp, "sn_change_memprotect(,, "
"SN_MEMPROT_ACCESS_CLASS_1,) failed ret=%d\n", ret);
return xpSalError;
}
return xpSuccess;
}
static enum xp_retval
xp_restrict_memprotect_sn2(unsigned long phys_addr, unsigned long size)
{
u64 nasid_array = 0;
int ret;
ret = sn_change_memprotect(phys_addr, size, SN_MEMPROT_ACCESS_CLASS_0,
&nasid_array);
if (ret != 0) {
dev_err(xp, "sn_change_memprotect(,, "
"SN_MEMPROT_ACCESS_CLASS_0,) failed ret=%d\n", ret);
return xpSalError;
}
return xpSuccess;
}
enum xp_retval
xp_init_sn2(void)
{
BUG_ON(!is_shub());
xp_max_npartitions = XP_MAX_NPARTITIONS_SN2;
xp_partition_id = sn_partition_id;
xp_region_size = sn_region_size;
xp_pa = xp_pa_sn2;
xp_socket_pa = xp_socket_pa_sn2;
xp_remote_memcpy = xp_remote_memcpy_sn2;
xp_cpu_to_nasid = xp_cpu_to_nasid_sn2;
xp_expand_memprotect = xp_expand_memprotect_sn2;
xp_restrict_memprotect = xp_restrict_memprotect_sn2;
return xp_register_nofault_code_sn2();
}
void
xp_exit_sn2(void)
{
BUG_ON(!is_shub());
xp_unregister_nofault_code_sn2();
}
+171
View File
@@ -0,0 +1,171 @@
/*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
* Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
*/
/*
* Cross Partition (XP) uv-based functions.
*
* Architecture specific implementation of common functions.
*
*/
#include <linux/device.h>
#include <asm/uv/uv_hub.h>
#if defined CONFIG_X86_64
#include <asm/uv/bios.h>
#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
#include <asm/sn/sn_sal.h>
#endif
#include "../sgi-gru/grukservices.h"
#include "xp.h"
/*
* Convert a virtual memory address to a physical memory address.
*/
static unsigned long
xp_pa_uv(void *addr)
{
return uv_gpa(addr);
}
/*
* Convert a global physical to socket physical address.
*/
static unsigned long
xp_socket_pa_uv(unsigned long gpa)
{
return uv_gpa_to_soc_phys_ram(gpa);
}
static enum xp_retval
xp_remote_mmr_read(unsigned long dst_gpa, const unsigned long src_gpa,
size_t len)
{
int ret;
unsigned long *dst_va = __va(uv_gpa_to_soc_phys_ram(dst_gpa));
BUG_ON(!uv_gpa_in_mmr_space(src_gpa));
BUG_ON(len != 8);
ret = gru_read_gpa(dst_va, src_gpa);
if (ret == 0)
return xpSuccess;
dev_err(xp, "gru_read_gpa() failed, dst_gpa=0x%016lx src_gpa=0x%016lx "
"len=%ld\n", dst_gpa, src_gpa, len);
return xpGruCopyError;
}
static enum xp_retval
xp_remote_memcpy_uv(unsigned long dst_gpa, const unsigned long src_gpa,
size_t len)
{
int ret;
if (uv_gpa_in_mmr_space(src_gpa))
return xp_remote_mmr_read(dst_gpa, src_gpa, len);
ret = gru_copy_gpa(dst_gpa, src_gpa, len);
if (ret == 0)
return xpSuccess;
dev_err(xp, "gru_copy_gpa() failed, dst_gpa=0x%016lx src_gpa=0x%016lx "
"len=%ld\n", dst_gpa, src_gpa, len);
return xpGruCopyError;
}
static int
xp_cpu_to_nasid_uv(int cpuid)
{
/* ??? Is this same as sn2 nasid in mach/part bitmaps set up by SAL? */
return UV_PNODE_TO_NASID(uv_cpu_to_pnode(cpuid));
}
static enum xp_retval
xp_expand_memprotect_uv(unsigned long phys_addr, unsigned long size)
{
int ret;
#if defined CONFIG_X86_64
ret = uv_bios_change_memprotect(phys_addr, size, UV_MEMPROT_ALLOW_RW);
if (ret != BIOS_STATUS_SUCCESS) {
dev_err(xp, "uv_bios_change_memprotect(,, "
"UV_MEMPROT_ALLOW_RW) failed, ret=%d\n", ret);
return xpBiosError;
}
#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
u64 nasid_array;
ret = sn_change_memprotect(phys_addr, size, SN_MEMPROT_ACCESS_CLASS_1,
&nasid_array);
if (ret != 0) {
dev_err(xp, "sn_change_memprotect(,, "
"SN_MEMPROT_ACCESS_CLASS_1,) failed ret=%d\n", ret);
return xpSalError;
}
#else
#error not a supported configuration
#endif
return xpSuccess;
}
static enum xp_retval
xp_restrict_memprotect_uv(unsigned long phys_addr, unsigned long size)
{
int ret;
#if defined CONFIG_X86_64
ret = uv_bios_change_memprotect(phys_addr, size,
UV_MEMPROT_RESTRICT_ACCESS);
if (ret != BIOS_STATUS_SUCCESS) {
dev_err(xp, "uv_bios_change_memprotect(,, "
"UV_MEMPROT_RESTRICT_ACCESS) failed, ret=%d\n", ret);
return xpBiosError;
}
#elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
u64 nasid_array;
ret = sn_change_memprotect(phys_addr, size, SN_MEMPROT_ACCESS_CLASS_0,
&nasid_array);
if (ret != 0) {
dev_err(xp, "sn_change_memprotect(,, "
"SN_MEMPROT_ACCESS_CLASS_0,) failed ret=%d\n", ret);
return xpSalError;
}
#else
#error not a supported configuration
#endif
return xpSuccess;
}
enum xp_retval
xp_init_uv(void)
{
BUG_ON(!is_uv());
xp_max_npartitions = XP_MAX_NPARTITIONS_UV;
xp_partition_id = sn_partition_id;
xp_region_size = sn_region_size;
xp_pa = xp_pa_uv;
xp_socket_pa = xp_socket_pa_uv;
xp_remote_memcpy = xp_remote_memcpy_uv;
xp_cpu_to_nasid = xp_cpu_to_nasid_uv;
xp_expand_memprotect = xp_expand_memprotect_uv;
xp_restrict_memprotect = xp_restrict_memprotect_uv;
return xpSuccess;
}
void
xp_exit_uv(void)
{
BUG_ON(!is_uv());
}
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
+541
View File
@@ -0,0 +1,541 @@
/*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
* Copyright (c) 2004-2008 Silicon Graphics, Inc. All Rights Reserved.
*/
/*
* Cross Partition Communication (XPC) partition support.
*
* This is the part of XPC that detects the presence/absence of
* other partitions. It provides a heartbeat and monitors the
* heartbeats of other partitions.
*
*/
#include <linux/device.h>
#include <linux/hardirq.h>
#include <linux/slab.h>
#include "xpc.h"
#include <asm/uv/uv_hub.h>
/* XPC is exiting flag */
int xpc_exiting;
/* this partition's reserved page pointers */
struct xpc_rsvd_page *xpc_rsvd_page;
static unsigned long *xpc_part_nasids;
unsigned long *xpc_mach_nasids;
static int xpc_nasid_mask_nbytes; /* #of bytes in nasid mask */
int xpc_nasid_mask_nlongs; /* #of longs in nasid mask */
struct xpc_partition *xpc_partitions;
/*
* Guarantee that the kmalloc'd memory is cacheline aligned.
*/
void *
xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
{
/* see if kmalloc will give us cachline aligned memory by default */
*base = kmalloc(size, flags);
if (*base == NULL)
return NULL;
if ((u64)*base == L1_CACHE_ALIGN((u64)*base))
return *base;
kfree(*base);
/* nope, we'll have to do it ourselves */
*base = kmalloc(size + L1_CACHE_BYTES, flags);
if (*base == NULL)
return NULL;
return (void *)L1_CACHE_ALIGN((u64)*base);
}
/*
* Given a nasid, get the physical address of the partition's reserved page
* for that nasid. This function returns 0 on any error.
*/
static unsigned long
xpc_get_rsvd_page_pa(int nasid)
{
enum xp_retval ret;
u64 cookie = 0;
unsigned long rp_pa = nasid; /* seed with nasid */
size_t len = 0;
size_t buf_len = 0;
void *buf = buf;
void *buf_base = NULL;
enum xp_retval (*get_partition_rsvd_page_pa)
(void *, u64 *, unsigned long *, size_t *) =
xpc_arch_ops.get_partition_rsvd_page_pa;
while (1) {
/* !!! rp_pa will need to be _gpa on UV.
* ??? So do we save it into the architecture specific parts
* ??? of the xpc_partition structure? Do we rename this
* ??? function or have two versions? Rename rp_pa for UV to
* ??? rp_gpa?
*/
ret = get_partition_rsvd_page_pa(buf, &cookie, &rp_pa, &len);
dev_dbg(xpc_part, "SAL returned with ret=%d, cookie=0x%016lx, "
"address=0x%016lx, len=0x%016lx\n", ret,
(unsigned long)cookie, rp_pa, len);
if (ret != xpNeedMoreInfo)
break;
/* !!! L1_CACHE_ALIGN() is only a sn2-bte_copy requirement */
if (is_shub())
len = L1_CACHE_ALIGN(len);
if (len > buf_len) {
if (buf_base != NULL)
kfree(buf_base);
buf_len = L1_CACHE_ALIGN(len);
buf = xpc_kmalloc_cacheline_aligned(buf_len, GFP_KERNEL,
&buf_base);
if (buf_base == NULL) {
dev_err(xpc_part, "unable to kmalloc "
"len=0x%016lx\n", buf_len);
ret = xpNoMemory;
break;
}
}
ret = xp_remote_memcpy(xp_pa(buf), rp_pa, len);
if (ret != xpSuccess) {
dev_dbg(xpc_part, "xp_remote_memcpy failed %d\n", ret);
break;
}
}
kfree(buf_base);
if (ret != xpSuccess)
rp_pa = 0;
dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa);
return rp_pa;
}
/*
* Fill the partition reserved page with the information needed by
* other partitions to discover we are alive and establish initial
* communications.
*/
int
xpc_setup_rsvd_page(void)
{
int ret;
struct xpc_rsvd_page *rp;
unsigned long rp_pa;
unsigned long new_ts_jiffies;
/* get the local reserved page's address */
preempt_disable();
rp_pa = xpc_get_rsvd_page_pa(xp_cpu_to_nasid(smp_processor_id()));
preempt_enable();
if (rp_pa == 0) {
dev_err(xpc_part, "SAL failed to locate the reserved page\n");
return -ESRCH;
}
rp = (struct xpc_rsvd_page *)__va(xp_socket_pa(rp_pa));
if (rp->SAL_version < 3) {
/* SAL_versions < 3 had a SAL_partid defined as a u8 */
rp->SAL_partid &= 0xff;
}
BUG_ON(rp->SAL_partid != xp_partition_id);
if (rp->SAL_partid < 0 || rp->SAL_partid >= xp_max_npartitions) {
dev_err(xpc_part, "the reserved page's partid of %d is outside "
"supported range (< 0 || >= %d)\n", rp->SAL_partid,
xp_max_npartitions);
return -EINVAL;
}
rp->version = XPC_RP_VERSION;
rp->max_npartitions = xp_max_npartitions;
/* establish the actual sizes of the nasid masks */
if (rp->SAL_version == 1) {
/* SAL_version 1 didn't set the nasids_size field */
rp->SAL_nasids_size = 128;
}
xpc_nasid_mask_nbytes = rp->SAL_nasids_size;
xpc_nasid_mask_nlongs = BITS_TO_LONGS(rp->SAL_nasids_size *
BITS_PER_BYTE);
/* setup the pointers to the various items in the reserved page */
xpc_part_nasids = XPC_RP_PART_NASIDS(rp);
xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp);
ret = xpc_arch_ops.setup_rsvd_page(rp);
if (ret != 0)
return ret;
/*
* Set timestamp of when reserved page was setup by XPC.
* This signifies to the remote partition that our reserved
* page is initialized.
*/
new_ts_jiffies = jiffies;
if (new_ts_jiffies == 0 || new_ts_jiffies == rp->ts_jiffies)
new_ts_jiffies++;
rp->ts_jiffies = new_ts_jiffies;
xpc_rsvd_page = rp;
return 0;
}
void
xpc_teardown_rsvd_page(void)
{
/* a zero timestamp indicates our rsvd page is not initialized */
xpc_rsvd_page->ts_jiffies = 0;
}
/*
* Get a copy of a portion of the remote partition's rsvd page.
*
* remote_rp points to a buffer that is cacheline aligned for BTE copies and
* is large enough to contain a copy of their reserved page header and
* part_nasids mask.
*/
enum xp_retval
xpc_get_remote_rp(int nasid, unsigned long *discovered_nasids,
struct xpc_rsvd_page *remote_rp, unsigned long *remote_rp_pa)
{
int l;
enum xp_retval ret;
/* get the reserved page's physical address */
*remote_rp_pa = xpc_get_rsvd_page_pa(nasid);
if (*remote_rp_pa == 0)
return xpNoRsvdPageAddr;
/* pull over the reserved page header and part_nasids mask */
ret = xp_remote_memcpy(xp_pa(remote_rp), *remote_rp_pa,
XPC_RP_HEADER_SIZE + xpc_nasid_mask_nbytes);
if (ret != xpSuccess)
return ret;
if (discovered_nasids != NULL) {
unsigned long *remote_part_nasids =
XPC_RP_PART_NASIDS(remote_rp);
for (l = 0; l < xpc_nasid_mask_nlongs; l++)
discovered_nasids[l] |= remote_part_nasids[l];
}
/* zero timestamp indicates the reserved page has not been setup */
if (remote_rp->ts_jiffies == 0)
return xpRsvdPageNotSet;
if (XPC_VERSION_MAJOR(remote_rp->version) !=
XPC_VERSION_MAJOR(XPC_RP_VERSION)) {
return xpBadVersion;
}
/* check that both remote and local partids are valid for each side */
if (remote_rp->SAL_partid < 0 ||
remote_rp->SAL_partid >= xp_max_npartitions ||
remote_rp->max_npartitions <= xp_partition_id) {
return xpInvalidPartid;
}
if (remote_rp->SAL_partid == xp_partition_id)
return xpLocalPartid;
return xpSuccess;
}
/*
* See if the other side has responded to a partition deactivate request
* from us. Though we requested the remote partition to deactivate with regard
* to us, we really only need to wait for the other side to disengage from us.
*/
int
xpc_partition_disengaged(struct xpc_partition *part)
{
short partid = XPC_PARTID(part);
int disengaged;
disengaged = !xpc_arch_ops.partition_engaged(partid);
if (part->disengage_timeout) {
if (!disengaged) {
if (time_is_after_jiffies(part->disengage_timeout)) {
/* timelimit hasn't been reached yet */
return 0;
}
/*
* Other side hasn't responded to our deactivate
* request in a timely fashion, so assume it's dead.
*/
dev_info(xpc_part, "deactivate request to remote "
"partition %d timed out\n", partid);
xpc_disengage_timedout = 1;
xpc_arch_ops.assume_partition_disengaged(partid);
disengaged = 1;
}
part->disengage_timeout = 0;
/* cancel the timer function, provided it's not us */
if (!in_interrupt())
del_singleshot_timer_sync(&part->disengage_timer);
DBUG_ON(part->act_state != XPC_P_AS_DEACTIVATING &&
part->act_state != XPC_P_AS_INACTIVE);
if (part->act_state != XPC_P_AS_INACTIVE)
xpc_wakeup_channel_mgr(part);
xpc_arch_ops.cancel_partition_deactivation_request(part);
}
return disengaged;
}
/*
* Mark specified partition as active.
*/
enum xp_retval
xpc_mark_partition_active(struct xpc_partition *part)
{
unsigned long irq_flags;
enum xp_retval ret;
dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part));
spin_lock_irqsave(&part->act_lock, irq_flags);
if (part->act_state == XPC_P_AS_ACTIVATING) {
part->act_state = XPC_P_AS_ACTIVE;
ret = xpSuccess;
} else {
DBUG_ON(part->reason == xpSuccess);
ret = part->reason;
}
spin_unlock_irqrestore(&part->act_lock, irq_flags);
return ret;
}
/*
* Start the process of deactivating the specified partition.
*/
void
xpc_deactivate_partition(const int line, struct xpc_partition *part,
enum xp_retval reason)
{
unsigned long irq_flags;
spin_lock_irqsave(&part->act_lock, irq_flags);
if (part->act_state == XPC_P_AS_INACTIVE) {
XPC_SET_REASON(part, reason, line);
spin_unlock_irqrestore(&part->act_lock, irq_flags);
if (reason == xpReactivating) {
/* we interrupt ourselves to reactivate partition */
xpc_arch_ops.request_partition_reactivation(part);
}
return;
}
if (part->act_state == XPC_P_AS_DEACTIVATING) {
if ((part->reason == xpUnloading && reason != xpUnloading) ||
reason == xpReactivating) {
XPC_SET_REASON(part, reason, line);
}
spin_unlock_irqrestore(&part->act_lock, irq_flags);
return;
}
part->act_state = XPC_P_AS_DEACTIVATING;
XPC_SET_REASON(part, reason, line);
spin_unlock_irqrestore(&part->act_lock, irq_flags);
/* ask remote partition to deactivate with regard to us */
xpc_arch_ops.request_partition_deactivation(part);
/* set a timelimit on the disengage phase of the deactivation request */
part->disengage_timeout = jiffies + (xpc_disengage_timelimit * HZ);
part->disengage_timer.expires = part->disengage_timeout;
add_timer(&part->disengage_timer);
dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n",
XPC_PARTID(part), reason);
xpc_partition_going_down(part, reason);
}
/*
* Mark specified partition as inactive.
*/
void
xpc_mark_partition_inactive(struct xpc_partition *part)
{
unsigned long irq_flags;
dev_dbg(xpc_part, "setting partition %d to INACTIVE\n",
XPC_PARTID(part));
spin_lock_irqsave(&part->act_lock, irq_flags);
part->act_state = XPC_P_AS_INACTIVE;
spin_unlock_irqrestore(&part->act_lock, irq_flags);
part->remote_rp_pa = 0;
}
/*
* SAL has provided a partition and machine mask. The partition mask
* contains a bit for each even nasid in our partition. The machine
* mask contains a bit for each even nasid in the entire machine.
*
* Using those two bit arrays, we can determine which nasids are
* known in the machine. Each should also have a reserved page
* initialized if they are available for partitioning.
*/
void
xpc_discovery(void)
{
void *remote_rp_base;
struct xpc_rsvd_page *remote_rp;
unsigned long remote_rp_pa;
int region;
int region_size;
int max_regions;
int nasid;
struct xpc_rsvd_page *rp;
unsigned long *discovered_nasids;
enum xp_retval ret;
remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RP_HEADER_SIZE +
xpc_nasid_mask_nbytes,
GFP_KERNEL, &remote_rp_base);
if (remote_rp == NULL)
return;
discovered_nasids = kzalloc(sizeof(long) * xpc_nasid_mask_nlongs,
GFP_KERNEL);
if (discovered_nasids == NULL) {
kfree(remote_rp_base);
return;
}
rp = (struct xpc_rsvd_page *)xpc_rsvd_page;
/*
* The term 'region' in this context refers to the minimum number of
* nodes that can comprise an access protection grouping. The access
* protection is in regards to memory, IOI and IPI.
*/
region_size = xp_region_size;
if (is_uv())
max_regions = 256;
else {
max_regions = 64;
switch (region_size) {
case 128:
max_regions *= 2;
case 64:
max_regions *= 2;
case 32:
max_regions *= 2;
region_size = 16;
DBUG_ON(!is_shub2());
}
}
for (region = 0; region < max_regions; region++) {
if (xpc_exiting)
break;
dev_dbg(xpc_part, "searching region %d\n", region);
for (nasid = (region * region_size * 2);
nasid < ((region + 1) * region_size * 2); nasid += 2) {
if (xpc_exiting)
break;
dev_dbg(xpc_part, "checking nasid %d\n", nasid);
if (test_bit(nasid / 2, xpc_part_nasids)) {
dev_dbg(xpc_part, "PROM indicates Nasid %d is "
"part of the local partition; skipping "
"region\n", nasid);
break;
}
if (!(test_bit(nasid / 2, xpc_mach_nasids))) {
dev_dbg(xpc_part, "PROM indicates Nasid %d was "
"not on Numa-Link network at reset\n",
nasid);
continue;
}
if (test_bit(nasid / 2, discovered_nasids)) {
dev_dbg(xpc_part, "Nasid %d is part of a "
"partition which was previously "
"discovered\n", nasid);
continue;
}
/* pull over the rsvd page header & part_nasids mask */
ret = xpc_get_remote_rp(nasid, discovered_nasids,
remote_rp, &remote_rp_pa);
if (ret != xpSuccess) {
dev_dbg(xpc_part, "unable to get reserved page "
"from nasid %d, reason=%d\n", nasid,
ret);
if (ret == xpLocalPartid)
break;
continue;
}
xpc_arch_ops.request_partition_activation(remote_rp,
remote_rp_pa, nasid);
}
}
kfree(discovered_nasids);
kfree(remote_rp_base);
}
/*
* Given a partid, get the nasids owned by that partition from the
* remote partition's reserved page.
*/
enum xp_retval
xpc_initiate_partid_to_nasids(short partid, void *nasid_mask)
{
struct xpc_partition *part;
unsigned long part_nasid_pa;
part = &xpc_partitions[partid];
if (part->remote_rp_pa == 0)
return xpPartitionDown;
memset(nasid_mask, 0, xpc_nasid_mask_nbytes);
part_nasid_pa = (unsigned long)XPC_RP_PART_NASIDS(part->remote_rp_pa);
return xp_remote_memcpy(xp_pa(nasid_mask), part_nasid_pa,
xpc_nasid_mask_nbytes);
}
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
+607
View File
@@ -0,0 +1,607 @@
/*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
* Copyright (C) 1999-2009 Silicon Graphics, Inc. All rights reserved.
*/
/*
* Cross Partition Network Interface (XPNET) support
*
* XPNET provides a virtual network layered on top of the Cross
* Partition communication layer.
*
* XPNET provides direct point-to-point and broadcast-like support
* for an ethernet-like device. The ethernet broadcast medium is
* replaced with a point-to-point message structure which passes
* pointers to a DMA-capable block that a remote partition should
* retrieve and pass to the upper level networking layer.
*
*/
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include "xp.h"
/*
* The message payload transferred by XPC.
*
* buf_pa is the physical address where the DMA should pull from.
*
* NOTE: for performance reasons, buf_pa should _ALWAYS_ begin on a
* cacheline boundary. To accomplish this, we record the number of
* bytes from the beginning of the first cacheline to the first useful
* byte of the skb (leadin_ignore) and the number of bytes from the
* last useful byte of the skb to the end of the last cacheline
* (tailout_ignore).
*
* size is the number of bytes to transfer which includes the skb->len
* (useful bytes of the senders skb) plus the leadin and tailout
*/
struct xpnet_message {
u16 version; /* Version for this message */
u16 embedded_bytes; /* #of bytes embedded in XPC message */
u32 magic; /* Special number indicating this is xpnet */
unsigned long buf_pa; /* phys address of buffer to retrieve */
u32 size; /* #of bytes in buffer */
u8 leadin_ignore; /* #of bytes to ignore at the beginning */
u8 tailout_ignore; /* #of bytes to ignore at the end */
unsigned char data; /* body of small packets */
};
/*
* Determine the size of our message, the cacheline aligned size,
* and then the number of message will request from XPC.
*
* XPC expects each message to exist in an individual cacheline.
*/
#define XPNET_MSG_SIZE XPC_MSG_PAYLOAD_MAX_SIZE
#define XPNET_MSG_DATA_MAX \
(XPNET_MSG_SIZE - offsetof(struct xpnet_message, data))
#define XPNET_MSG_NENTRIES (PAGE_SIZE / XPC_MSG_MAX_SIZE)
#define XPNET_MAX_KTHREADS (XPNET_MSG_NENTRIES + 1)
#define XPNET_MAX_IDLE_KTHREADS (XPNET_MSG_NENTRIES + 1)
/*
* Version number of XPNET implementation. XPNET can always talk to versions
* with same major #, and never talk to versions with a different version.
*/
#define _XPNET_VERSION(_major, _minor) (((_major) << 4) | (_minor))
#define XPNET_VERSION_MAJOR(_v) ((_v) >> 4)
#define XPNET_VERSION_MINOR(_v) ((_v) & 0xf)
#define XPNET_VERSION _XPNET_VERSION(1, 0) /* version 1.0 */
#define XPNET_VERSION_EMBED _XPNET_VERSION(1, 1) /* version 1.1 */
#define XPNET_MAGIC 0x88786984 /* "XNET" */
#define XPNET_VALID_MSG(_m) \
((XPNET_VERSION_MAJOR(_m->version) == XPNET_VERSION_MAJOR(XPNET_VERSION)) \
&& (msg->magic == XPNET_MAGIC))
#define XPNET_DEVICE_NAME "xp0"
/*
* When messages are queued with xpc_send_notify, a kmalloc'd buffer
* of the following type is passed as a notification cookie. When the
* notification function is called, we use the cookie to decide
* whether all outstanding message sends have completed. The skb can
* then be released.
*/
struct xpnet_pending_msg {
struct sk_buff *skb;
atomic_t use_count;
};
struct net_device *xpnet_device;
/*
* When we are notified of other partitions activating, we add them to
* our bitmask of partitions to which we broadcast.
*/
static unsigned long *xpnet_broadcast_partitions;
/* protect above */
static DEFINE_SPINLOCK(xpnet_broadcast_lock);
/*
* Since the Block Transfer Engine (BTE) is being used for the transfer
* and it relies upon cache-line size transfers, we need to reserve at
* least one cache-line for head and tail alignment. The BTE is
* limited to 8MB transfers.
*
* Testing has shown that changing MTU to greater than 64KB has no effect
* on TCP as the two sides negotiate a Max Segment Size that is limited
* to 64K. Other protocols May use packets greater than this, but for
* now, the default is 64KB.
*/
#define XPNET_MAX_MTU (0x800000UL - L1_CACHE_BYTES)
/* 32KB has been determined to be the ideal */
#define XPNET_DEF_MTU (0x8000UL)
/*
* The partid is encapsulated in the MAC address beginning in the following
* octet and it consists of two octets.
*/
#define XPNET_PARTID_OCTET 2
/* Define the XPNET debug device structures to be used with dev_dbg() et al */
struct device_driver xpnet_dbg_name = {
.name = "xpnet"
};
struct device xpnet_dbg_subname = {
.init_name = "", /* set to "" */
.driver = &xpnet_dbg_name
};
struct device *xpnet = &xpnet_dbg_subname;
/*
* Packet was recevied by XPC and forwarded to us.
*/
static void
xpnet_receive(short partid, int channel, struct xpnet_message *msg)
{
struct sk_buff *skb;
void *dst;
enum xp_retval ret;
if (!XPNET_VALID_MSG(msg)) {
/*
* Packet with a different XPC version. Ignore.
*/
xpc_received(partid, channel, (void *)msg);
xpnet_device->stats.rx_errors++;
return;
}
dev_dbg(xpnet, "received 0x%lx, %d, %d, %d\n", msg->buf_pa, msg->size,
msg->leadin_ignore, msg->tailout_ignore);
/* reserve an extra cache line */
skb = dev_alloc_skb(msg->size + L1_CACHE_BYTES);
if (!skb) {
dev_err(xpnet, "failed on dev_alloc_skb(%d)\n",
msg->size + L1_CACHE_BYTES);
xpc_received(partid, channel, (void *)msg);
xpnet_device->stats.rx_errors++;
return;
}
/*
* The allocated skb has some reserved space.
* In order to use xp_remote_memcpy(), we need to get the
* skb->data pointer moved forward.
*/
skb_reserve(skb, (L1_CACHE_BYTES - ((u64)skb->data &
(L1_CACHE_BYTES - 1)) +
msg->leadin_ignore));
/*
* Update the tail pointer to indicate data actually
* transferred.
*/
skb_put(skb, (msg->size - msg->leadin_ignore - msg->tailout_ignore));
/*
* Move the data over from the other side.
*/
if ((XPNET_VERSION_MINOR(msg->version) == 1) &&
(msg->embedded_bytes != 0)) {
dev_dbg(xpnet, "copying embedded message. memcpy(0x%p, 0x%p, "
"%lu)\n", skb->data, &msg->data,
(size_t)msg->embedded_bytes);
skb_copy_to_linear_data(skb, &msg->data,
(size_t)msg->embedded_bytes);
} else {
dst = (void *)((u64)skb->data & ~(L1_CACHE_BYTES - 1));
dev_dbg(xpnet, "transferring buffer to the skb->data area;\n\t"
"xp_remote_memcpy(0x%p, 0x%p, %hu)\n", dst,
(void *)msg->buf_pa, msg->size);
ret = xp_remote_memcpy(xp_pa(dst), msg->buf_pa, msg->size);
if (ret != xpSuccess) {
/*
* !!! Need better way of cleaning skb. Currently skb
* !!! appears in_use and we can't just call
* !!! dev_kfree_skb.
*/
dev_err(xpnet, "xp_remote_memcpy(0x%p, 0x%p, 0x%hx) "
"returned error=0x%x\n", dst,
(void *)msg->buf_pa, msg->size, ret);
xpc_received(partid, channel, (void *)msg);
xpnet_device->stats.rx_errors++;
return;
}
}
dev_dbg(xpnet, "<skb->head=0x%p skb->data=0x%p skb->tail=0x%p "
"skb->end=0x%p skb->len=%d\n", (void *)skb->head,
(void *)skb->data, skb_tail_pointer(skb), skb_end_pointer(skb),
skb->len);
skb->protocol = eth_type_trans(skb, xpnet_device);
skb->ip_summed = CHECKSUM_UNNECESSARY;
dev_dbg(xpnet, "passing skb to network layer\n"
"\tskb->head=0x%p skb->data=0x%p skb->tail=0x%p "
"skb->end=0x%p skb->len=%d\n",
(void *)skb->head, (void *)skb->data, skb_tail_pointer(skb),
skb_end_pointer(skb), skb->len);
xpnet_device->stats.rx_packets++;
xpnet_device->stats.rx_bytes += skb->len + ETH_HLEN;
netif_rx_ni(skb);
xpc_received(partid, channel, (void *)msg);
}
/*
* This is the handler which XPC calls during any sort of change in
* state or message reception on a connection.
*/
static void
xpnet_connection_activity(enum xp_retval reason, short partid, int channel,
void *data, void *key)
{
DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
DBUG_ON(channel != XPC_NET_CHANNEL);
switch (reason) {
case xpMsgReceived: /* message received */
DBUG_ON(data == NULL);
xpnet_receive(partid, channel, (struct xpnet_message *)data);
break;
case xpConnected: /* connection completed to a partition */
spin_lock_bh(&xpnet_broadcast_lock);
__set_bit(partid, xpnet_broadcast_partitions);
spin_unlock_bh(&xpnet_broadcast_lock);
netif_carrier_on(xpnet_device);
dev_dbg(xpnet, "%s connected to partition %d\n",
xpnet_device->name, partid);
break;
default:
spin_lock_bh(&xpnet_broadcast_lock);
__clear_bit(partid, xpnet_broadcast_partitions);
spin_unlock_bh(&xpnet_broadcast_lock);
if (bitmap_empty((unsigned long *)xpnet_broadcast_partitions,
xp_max_npartitions)) {
netif_carrier_off(xpnet_device);
}
dev_dbg(xpnet, "%s disconnected from partition %d\n",
xpnet_device->name, partid);
break;
}
}
static int
xpnet_dev_open(struct net_device *dev)
{
enum xp_retval ret;
dev_dbg(xpnet, "calling xpc_connect(%d, 0x%p, NULL, %ld, %ld, %ld, "
"%ld)\n", XPC_NET_CHANNEL, xpnet_connection_activity,
(unsigned long)XPNET_MSG_SIZE,
(unsigned long)XPNET_MSG_NENTRIES,
(unsigned long)XPNET_MAX_KTHREADS,
(unsigned long)XPNET_MAX_IDLE_KTHREADS);
ret = xpc_connect(XPC_NET_CHANNEL, xpnet_connection_activity, NULL,
XPNET_MSG_SIZE, XPNET_MSG_NENTRIES,
XPNET_MAX_KTHREADS, XPNET_MAX_IDLE_KTHREADS);
if (ret != xpSuccess) {
dev_err(xpnet, "ifconfig up of %s failed on XPC connect, "
"ret=%d\n", dev->name, ret);
return -ENOMEM;
}
dev_dbg(xpnet, "ifconfig up of %s; XPC connected\n", dev->name);
return 0;
}
static int
xpnet_dev_stop(struct net_device *dev)
{
xpc_disconnect(XPC_NET_CHANNEL);
dev_dbg(xpnet, "ifconfig down of %s; XPC disconnected\n", dev->name);
return 0;
}
static int
xpnet_dev_change_mtu(struct net_device *dev, int new_mtu)
{
/* 68 comes from min TCP+IP+MAC header */
if ((new_mtu < 68) || (new_mtu > XPNET_MAX_MTU)) {
dev_err(xpnet, "ifconfig %s mtu %d failed; value must be "
"between 68 and %ld\n", dev->name, new_mtu,
XPNET_MAX_MTU);
return -EINVAL;
}
dev->mtu = new_mtu;
dev_dbg(xpnet, "ifconfig %s mtu set to %d\n", dev->name, new_mtu);
return 0;
}
/*
* Notification that the other end has received the message and
* DMA'd the skb information. At this point, they are done with
* our side. When all recipients are done processing, we
* release the skb and then release our pending message structure.
*/
static void
xpnet_send_completed(enum xp_retval reason, short partid, int channel,
void *__qm)
{
struct xpnet_pending_msg *queued_msg = (struct xpnet_pending_msg *)__qm;
DBUG_ON(queued_msg == NULL);
dev_dbg(xpnet, "message to %d notified with reason %d\n",
partid, reason);
if (atomic_dec_return(&queued_msg->use_count) == 0) {
dev_dbg(xpnet, "all acks for skb->head=-x%p\n",
(void *)queued_msg->skb->head);
dev_kfree_skb_any(queued_msg->skb);
kfree(queued_msg);
}
}
static void
xpnet_send(struct sk_buff *skb, struct xpnet_pending_msg *queued_msg,
u64 start_addr, u64 end_addr, u16 embedded_bytes, int dest_partid)
{
u8 msg_buffer[XPNET_MSG_SIZE];
struct xpnet_message *msg = (struct xpnet_message *)&msg_buffer;
u16 msg_size = sizeof(struct xpnet_message);
enum xp_retval ret;
msg->embedded_bytes = embedded_bytes;
if (unlikely(embedded_bytes != 0)) {
msg->version = XPNET_VERSION_EMBED;
dev_dbg(xpnet, "calling memcpy(0x%p, 0x%p, 0x%lx)\n",
&msg->data, skb->data, (size_t)embedded_bytes);
skb_copy_from_linear_data(skb, &msg->data,
(size_t)embedded_bytes);
msg_size += embedded_bytes - 1;
} else {
msg->version = XPNET_VERSION;
}
msg->magic = XPNET_MAGIC;
msg->size = end_addr - start_addr;
msg->leadin_ignore = (u64)skb->data - start_addr;
msg->tailout_ignore = end_addr - (u64)skb_tail_pointer(skb);
msg->buf_pa = xp_pa((void *)start_addr);
dev_dbg(xpnet, "sending XPC message to %d:%d\n"
"msg->buf_pa=0x%lx, msg->size=%u, "
"msg->leadin_ignore=%u, msg->tailout_ignore=%u\n",
dest_partid, XPC_NET_CHANNEL, msg->buf_pa, msg->size,
msg->leadin_ignore, msg->tailout_ignore);
atomic_inc(&queued_msg->use_count);
ret = xpc_send_notify(dest_partid, XPC_NET_CHANNEL, XPC_NOWAIT, msg,
msg_size, xpnet_send_completed, queued_msg);
if (unlikely(ret != xpSuccess))
atomic_dec(&queued_msg->use_count);
}
/*
* Network layer has formatted a packet (skb) and is ready to place it
* "on the wire". Prepare and send an xpnet_message to all partitions
* which have connected with us and are targets of this packet.
*
* MAC-NOTE: For the XPNET driver, the MAC address contains the
* destination partid. If the destination partid octets are 0xffff,
* this packet is to be broadcast to all connected partitions.
*/
static int
xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct xpnet_pending_msg *queued_msg;
u64 start_addr, end_addr;
short dest_partid;
u16 embedded_bytes = 0;
dev_dbg(xpnet, ">skb->head=0x%p skb->data=0x%p skb->tail=0x%p "
"skb->end=0x%p skb->len=%d\n", (void *)skb->head,
(void *)skb->data, skb_tail_pointer(skb), skb_end_pointer(skb),
skb->len);
if (skb->data[0] == 0x33) {
dev_kfree_skb(skb);
return NETDEV_TX_OK; /* nothing needed to be done */
}
/*
* The xpnet_pending_msg tracks how many outstanding
* xpc_send_notifies are relying on this skb. When none
* remain, release the skb.
*/
queued_msg = kmalloc(sizeof(struct xpnet_pending_msg), GFP_ATOMIC);
if (queued_msg == NULL) {
dev_warn(xpnet, "failed to kmalloc %ld bytes; dropping "
"packet\n", sizeof(struct xpnet_pending_msg));
dev->stats.tx_errors++;
dev_kfree_skb(skb);
return NETDEV_TX_OK;
}
/* get the beginning of the first cacheline and end of last */
start_addr = ((u64)skb->data & ~(L1_CACHE_BYTES - 1));
end_addr = L1_CACHE_ALIGN((u64)skb_tail_pointer(skb));
/* calculate how many bytes to embed in the XPC message */
if (unlikely(skb->len <= XPNET_MSG_DATA_MAX)) {
/* skb->data does fit so embed */
embedded_bytes = skb->len;
}
/*
* Since the send occurs asynchronously, we set the count to one
* and begin sending. Any sends that happen to complete before
* we are done sending will not free the skb. We will be left
* with that task during exit. This also handles the case of
* a packet destined for a partition which is no longer up.
*/
atomic_set(&queued_msg->use_count, 1);
queued_msg->skb = skb;
if (skb->data[0] == 0xff) {
/* we are being asked to broadcast to all partitions */
for_each_set_bit(dest_partid, xpnet_broadcast_partitions,
xp_max_npartitions) {
xpnet_send(skb, queued_msg, start_addr, end_addr,
embedded_bytes, dest_partid);
}
} else {
dest_partid = (short)skb->data[XPNET_PARTID_OCTET + 1];
dest_partid |= (short)skb->data[XPNET_PARTID_OCTET + 0] << 8;
if (dest_partid >= 0 &&
dest_partid < xp_max_npartitions &&
test_bit(dest_partid, xpnet_broadcast_partitions) != 0) {
xpnet_send(skb, queued_msg, start_addr, end_addr,
embedded_bytes, dest_partid);
}
}
dev->stats.tx_packets++;
dev->stats.tx_bytes += skb->len;
if (atomic_dec_return(&queued_msg->use_count) == 0) {
dev_kfree_skb(skb);
kfree(queued_msg);
}
return NETDEV_TX_OK;
}
/*
* Deal with transmit timeouts coming from the network layer.
*/
static void
xpnet_dev_tx_timeout(struct net_device *dev)
{
dev->stats.tx_errors++;
}
static const struct net_device_ops xpnet_netdev_ops = {
.ndo_open = xpnet_dev_open,
.ndo_stop = xpnet_dev_stop,
.ndo_start_xmit = xpnet_dev_hard_start_xmit,
.ndo_change_mtu = xpnet_dev_change_mtu,
.ndo_tx_timeout = xpnet_dev_tx_timeout,
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
};
static int __init
xpnet_init(void)
{
int result;
if (!is_shub() && !is_uv())
return -ENODEV;
dev_info(xpnet, "registering network device %s\n", XPNET_DEVICE_NAME);
xpnet_broadcast_partitions = kzalloc(BITS_TO_LONGS(xp_max_npartitions) *
sizeof(long), GFP_KERNEL);
if (xpnet_broadcast_partitions == NULL)
return -ENOMEM;
/*
* use ether_setup() to init the majority of our device
* structure and then override the necessary pieces.
*/
xpnet_device = alloc_netdev(0, XPNET_DEVICE_NAME, ether_setup);
if (xpnet_device == NULL) {
kfree(xpnet_broadcast_partitions);
return -ENOMEM;
}
netif_carrier_off(xpnet_device);
xpnet_device->netdev_ops = &xpnet_netdev_ops;
xpnet_device->mtu = XPNET_DEF_MTU;
/*
* Multicast assumes the LSB of the first octet is set for multicast
* MAC addresses. We chose the first octet of the MAC to be unlikely
* to collide with any vendor's officially issued MAC.
*/
xpnet_device->dev_addr[0] = 0x02; /* locally administered, no OUI */
xpnet_device->dev_addr[XPNET_PARTID_OCTET + 1] = xp_partition_id;
xpnet_device->dev_addr[XPNET_PARTID_OCTET + 0] = (xp_partition_id >> 8);
/*
* ether_setup() sets this to a multicast device. We are
* really not supporting multicast at this time.
*/
xpnet_device->flags &= ~IFF_MULTICAST;
/*
* No need to checksum as it is a DMA transfer. The BTE will
* report an error if the data is not retrievable and the
* packet will be dropped.
*/
xpnet_device->features = NETIF_F_HW_CSUM;
result = register_netdev(xpnet_device);
if (result != 0) {
free_netdev(xpnet_device);
kfree(xpnet_broadcast_partitions);
}
return result;
}
module_init(xpnet_init);
static void __exit
xpnet_exit(void)
{
dev_info(xpnet, "unregistering network device %s\n",
xpnet_device[0].name);
unregister_netdev(xpnet_device);
free_netdev(xpnet_device);
kfree(xpnet_broadcast_partitions);
}
module_exit(xpnet_exit);
MODULE_AUTHOR("Silicon Graphics, Inc.");
MODULE_DESCRIPTION("Cross Partition Network adapter (XPNET)");
MODULE_LICENSE("GPL");