[prev in list] [next in list] [prev in thread] [next in thread] 

List:       linux-sparc
Subject:    [sparc] The iommu rewrite
From:       Pete Zaitcev <zaitcev () redhat ! com>
Date:       2003-04-29 17:53:56
[Download RAW message or body]

The iommu rewrite is basically complete. You are free
to uncover your eyes and look. I may yet tweak something
when I do the sun4d support.

This drags ioport cleanups and fixes with it, because they
overlap textually and I was too lazy to split them. But those
should be good too.

The patch removes 9 XXXs and adds 7.

-- Pete

diff -urN -X dontdiff linux-2.5.68-bk5/arch/sparc/kernel/ioport.c \
                linux-2.5.68-bk5-sparc/arch/sparc/kernel/ioport.c
--- linux-2.5.68-bk5/arch/sparc/kernel/ioport.c	2003-03-24 14:01:48.000000000 -0800
+++ linux-2.5.68-bk5-sparc/arch/sparc/kernel/ioport.c	2003-04-28 22:33:26.000000000 \
-0700 @@ -62,13 +62,6 @@
 };
 
 /*
- * BTFIXUP would do as well but it seems overkill for the case.
- */
-static void (*_sparc_mapioaddr)(unsigned long pa, unsigned long va,
-    int bus, int ro);
-static void (*_sparc_unmapioaddr)(unsigned long va);
-
-/*
  * Our mini-allocator...
  * Boy this is gross! We need it because we must map I/O for
  * timers and interrupt controller before the kmalloc is available.
@@ -201,8 +194,6 @@
 _sparc_ioremap(struct resource *res, u32 bus, u32 pa, int sz)
 {
 	unsigned long offset = ((unsigned long) pa) & (~PAGE_MASK);
-	unsigned long va;
-	unsigned int psz;
 
 	if (allocate_resource(&sparc_iomap, res,
 	    (offset + sz + PAGE_SIZE-1) & PAGE_MASK,
@@ -213,27 +204,10 @@
 		prom_halt();
 	}
 
-	va = res->start;
 	pa &= PAGE_MASK;
-	for (psz = res->end - res->start + 1; psz != 0; psz -= PAGE_SIZE) {
-		(*_sparc_mapioaddr)(pa, va, bus, 0);
-		va += PAGE_SIZE;
-		pa += PAGE_SIZE;
-	}
+	sparc_mapiorange(bus, pa, res->start, res->end - res->start + 1);
 
-	/*
-	 * XXX Playing with implementation details here.
-	 * On sparc64 Ebus has resources with precise boundaries.
-	 * We share drivers with sparc64. Too clever drivers use
-	 * start of a resource instead of a base address.
-	 *
-	 * XXX-2 This may be not valid anymore, clean when
-	 * interface to sbus_ioremap() is resolved.
-	 */
-	res->start += offset;
-	res->end = res->start + sz - 1;		/* not strictly necessary.. */
-
-	return (void *) res->start;
+	return (void *) (res->start + offset);
 }
 
 /*
@@ -244,12 +218,8 @@
 	unsigned long plen;
 
 	plen = res->end - res->start + 1;
-	plen = (plen + PAGE_SIZE-1) & PAGE_MASK;
-	while (plen != 0) {
-		plen -= PAGE_SIZE;
-		(*_sparc_unmapioaddr)(res->start + plen);
-	}
-
+	if ((plen & (PAGE_SIZE-1)) != 0) BUG();
+	sparc_unmapiorange(res->start, plen);
 	release_resource(res);
 }
 
@@ -283,40 +253,44 @@
 	}
 
 	order = get_order(len_total);
-	va = __get_free_pages(GFP_KERNEL, order);
-	if (va == 0) {
-		/*
-		 * printk here may be flooding... Consider removal XXX.
-		 */
-		printk("sbus_alloc_consistent: no %ld pages\n", len_total>>PAGE_SHIFT);
-		return NULL;
-	}
+	if ((va = __get_free_pages(GFP_KERNEL, order)) == 0)
+		goto err_nopages;
 
-	if ((res = kmalloc(sizeof(struct resource), GFP_KERNEL)) == NULL) {
-		free_pages(va, order);
-		printk("sbus_alloc_consistent: no core\n");
-		return NULL;
-	}
+	if ((res = kmalloc(sizeof(struct resource), GFP_KERNEL)) == NULL)
+		goto err_nomem;
 	memset((char*)res, 0, sizeof(struct resource));
 
 	if (allocate_resource(&_sparc_dvma, res, len_total,
 	    _sparc_dvma.start, _sparc_dvma.end, PAGE_SIZE, NULL, NULL) != 0) {
 		printk("sbus_alloc_consistent: cannot occupy 0x%lx", len_total);
-		free_pages(va, order);
-		kfree(res);
-		return NULL;
+		goto err_nova;
 	}
+	mmu_inval_dma_area(va, len_total);
+	// XXX The mmu_map_dma_area does this for us below, see comments.
+	// sparc_mapiorange(0, virt_to_phys(va), res->start, len_total);
+	/*
+	 * XXX That's where sdev would be used. Currently we load
+	 * all iommu tables with the same translations.
+	 */
+	if (mmu_map_dma_area(dma_addrp, va, res->start, len_total) != 0)
+		goto err_noiommu;
 
-	mmu_map_dma_area(va, res->start, len_total);
-
-	*dma_addrp = res->start;
 	return (void *)res->start;
+
+err_noiommu:
+	release_resource(res);
+err_nova:
+	free_pages(va, order);
+err_nomem:
+	kfree(res);
+err_nopages:
+	return NULL;
 }
 
 void sbus_free_consistent(struct sbus_dev *sdev, long n, void *p, u32 ba)
 {
 	struct resource *res;
-	unsigned long pgp;
+	struct page *pgv;
 
 	if ((res = _sparc_find_resource(&_sparc_dvma,
 	    (unsigned long)p)) == NULL) {
@@ -340,10 +314,10 @@
 	kfree(res);
 
 	/* mmu_inval_dma_area(va, n); */ /* it's consistent, isn't it */
-	pgp = (unsigned long) phys_to_virt(mmu_translate_dvma(ba));
+	pgv = mmu_translate_dvma(ba);
 	mmu_unmap_dma_area(ba, n);
 
-	free_pages(pgp, get_order(n));
+	__free_pages(pgv, get_order(n));
 }
 
 /*
@@ -353,39 +327,6 @@
  */
 dma_addr_t sbus_map_single(struct sbus_dev *sdev, void *va, size_t len, int \
direction)  {
-#if 0 /* This is the version that abuses consistent space */
-	unsigned long len_total = (len + PAGE_SIZE-1) & PAGE_MASK;
-	struct resource *res;
-
-	/* XXX why are some lenghts signed, others unsigned? */
-	if (len <= 0) {
-		return 0;
-	}
-	/* XXX So what is maxphys for us and how do drivers know it? */
-	if (len > 256*1024) {			/* __get_free_pages() limit */
-		return 0;
-	}
-
-	if ((res = kmalloc(sizeof(struct resource), GFP_KERNEL)) == NULL) {
-		printk("sbus_map_single: no core\n");
-		return 0;
-	}
-	memset((char*)res, 0, sizeof(struct resource));
-	res->name = va; /* XXX */
-
-	if (allocate_resource(&_sparc_dvma, res, len_total,
-	    _sparc_dvma.start, _sparc_dvma.end, PAGE_SIZE) != 0) {
-		printk("sbus_map_single: cannot occupy 0x%lx", len);
-		kfree(res);
-		return 0;
-	}
-
-	mmu_map_dma_area(va, res->start, len_total);
-	mmu_flush_dma_area((unsigned long)va, len_total); /* in all contexts? */
-
-	return res->start;
-#endif
-#if 1 /* "trampoline" version */
 	/* XXX why are some lenghts signed, others unsigned? */
 	if (len <= 0) {
 		return 0;
@@ -395,36 +336,11 @@
 		return 0;
 	}
 	return mmu_get_scsi_one(va, len, sdev->bus);
-#endif
 }
 
 void sbus_unmap_single(struct sbus_dev *sdev, dma_addr_t ba, size_t n, int \
direction)  {
-#if 0 /* This is the version that abuses consistent space */
-	struct resource *res;
-	unsigned long va;
-
-	if ((res = _sparc_find_resource(&_sparc_dvma, ba)) == NULL) {
-		printk("sbus_unmap_single: cannot find %08x\n", (unsigned)ba);
-		return;
-	}
-
-	n = (n + PAGE_SIZE-1) & PAGE_MASK;
-	if ((res->end-res->start)+1 != n) {
-		printk("sbus_unmap_single: region 0x%lx asked 0x%lx\n",
-		    (long)((res->end-res->start)+1), n);
-		return;
-	}
-
-	va = (unsigned long) res->name;	/* XXX Ouch */
-	mmu_inval_dma_area(va, n);	/* in all contexts, mm's?... */
-	mmu_unmap_dma_area(ba, n);	/* iounit cache flush is here */
-	release_resource(res);
-	kfree(res);
-#endif
-#if 1 /* "trampoline" version */
 	mmu_release_scsi_one(ba, n, sdev->bus);
-#endif
 }
 
 int sbus_map_sg(struct sbus_dev *sdev, struct scatterlist *sg, int n, int direction)
@@ -456,7 +372,7 @@
 	if (res == NULL)
 		panic("sbus_dma_sync_single: 0x%x\n", ba);
 
-	va = (unsigned long) phys_to_virt(mmu_translate_dvma(ba));
+	va = page_address(mmu_translate_dvma(ba)); /* XXX higmem */
 	/*
 	 * XXX This bogosity will be fixed with the iommu rewrite coming soon
 	 * to a kernel near you. - Anton
@@ -511,24 +427,12 @@
 		kfree(res);
 		return NULL;
 	}
-
 	mmu_inval_dma_area(va, len_total);
-
 #if 0
-/* P3 */ printk("pci_alloc_consistent: kva %lx uncva %lx phys %lx size %x\n",
+/* P3 */ printk("pci_alloc_consistent: kva %lx uncva %lx phys %lx size %lx\n",
   (long)va, (long)res->start, (long)virt_to_phys(va), len_total);
 #endif
-	{
-		unsigned long xva, xpa;
-		xva = res->start;
-		xpa = virt_to_phys(va);
-		while (len_total != 0) {
-			len_total -= PAGE_SIZE;
-			(*_sparc_mapioaddr)(xpa, xva, 0, 0);
-			xva += PAGE_SIZE;
-			xpa += PAGE_SIZE;
-		}
-	}
+	sparc_mapiorange(0, virt_to_phys(va), res->start, len_total);
 
 	*pba = virt_to_phys(va); /* equals virt_to_bus (R.I.P.) for us. */
 	return (void *) res->start;
@@ -567,12 +471,7 @@
 
 	pgp = (unsigned long) phys_to_virt(ba);	/* bus_to_virt actually */
 	mmu_inval_dma_area(pgp, n);
-	{
-		int x;
-		for (x = 0; x < n; x += PAGE_SIZE) {
-			(*_sparc_unmapioaddr)((unsigned long)p + n);
-		}
-	}
+	sparc_unmapiorange((unsigned long)p, n);
 
 	release_resource(res);
 	kfree(res);
@@ -751,37 +650,6 @@
 	return NULL;
 }
 
-/*
- * Necessary boot time initializations.
- */
-
-void ioport_init(void)
-{
-	extern void sun4c_mapioaddr(unsigned long, unsigned long, int, int);
-	extern void srmmu_mapioaddr(unsigned long, unsigned long, int, int);
-	extern void sun4c_unmapioaddr(unsigned long);
-	extern void srmmu_unmapioaddr(unsigned long);
-
-	switch(sparc_cpu_model) {
-	case sun4c:
-	case sun4:
-	case sun4e:
-		_sparc_mapioaddr = sun4c_mapioaddr;
-		_sparc_unmapioaddr = sun4c_unmapioaddr;
-		break;
-	case sun4m:
-	case sun4d:
-		_sparc_mapioaddr = srmmu_mapioaddr;
-		_sparc_unmapioaddr = srmmu_unmapioaddr;
-		break;
-	default:
-		printk("ioport_init: cpu type %d is unknown.\n",
-		    sparc_cpu_model);
-		prom_halt();
-	};
-
-}
-
 void register_proc_sparc_ioport(void)
 {
 #ifdef CONFIG_PROC_FS
diff -urN -X dontdiff linux-2.5.68-bk5/arch/sparc/lib/bitext.c \
                linux-2.5.68-bk5-sparc/arch/sparc/lib/bitext.c
--- linux-2.5.68-bk5/arch/sparc/lib/bitext.c	1969-12-31 16:00:00.000000000 -0800
+++ linux-2.5.68-bk5-sparc/arch/sparc/lib/bitext.c	2003-04-26 22:57:01.000000000 \
-0700 @@ -0,0 +1,114 @@
+/*
+ * bitext.c: kernel little helper (of bit shuffling variety).
+ *
+ * Copyright (C) 2002 Pete Zaitcev <zaitcev@yahoo.com>
+ *
+ * The algorithm to search a zero bit string is geared towards its application.
+ * We expect a couple of fixed sizes of requests, so a rotating counter, reset
+ * by align size, should provide fast enough search while maintaining low
+ * fragmentation.
+ */
+
+#include <linux/smp_lock.h>
+
+#include <asm/bitext.h>
+#include <asm/bitops.h>
+
+/**
+ * bit_map_string_get - find and set a bit string in bit map.
+ * @t: the bit map.
+ * @len: requested string length
+ * @align: requested alignment
+ *
+ * Returns offset in the map or -1 if out of space.
+ *
+ * Not safe to call from an interrupt (uses spin_lock).
+ */
+int bit_map_string_get(struct bit_map *t, int len, int align)
+{
+	int offset, count;	/* siamese twins */
+	int off_new;
+	int align1;
+	int i;
+
+	if (align == 0)
+		align = 1;
+	align1 = align - 1;
+	if ((align & align1) != 0)
+		BUG();
+	if (align < 0 || align >= t->size)
+		BUG();
+	if (len <= 0 || len > t->size)
+		BUG();
+
+	spin_lock(&t->lock);
+	offset = t->last_off & ~align1;
+	count = 0;
+	for (;;) {
+		off_new = find_next_zero_bit(t->map, t->size, offset);
+		off_new = (off_new + align1) & ~align1;
+		count += off_new - offset;
+		offset = off_new;
+		if (offset >= t->size)
+			offset = 0;
+		if (count + len > t->size) {
+			spin_unlock(&t->lock);
+/* P3 */ printk(KERN_ERR
+  "bitmap out: size %d used %d off %d len %d align %d count %d\n",
+  t->size, t->used, offset, len, align, count);
+			return -1;
+		}
+
+		if (offset + len > t->size) {
+			offset = 0;
+			count += t->size - offset;
+			continue;
+		}
+
+		i = 0;
+		while (test_bit(offset + i, t->map) == 0) {
+			i++;
+			if (i == len) {
+				for (i = 0; i < len; i++)
+					__set_bit(offset + i, t->map);
+				if ((t->last_off = offset + len) >= t->size)
+					t->last_off = 0;
+				t->used += len;
+				spin_unlock(&t->lock);
+				return offset;
+			}
+		}
+		count += i + 1;
+		if ((offset += i + 1) >= t->size)
+			offset = 0;
+	}
+}
+
+void bit_map_clear(struct bit_map *t, int offset, int len)
+{
+	int i;
+
+	if (t->used < len)
+		BUG();		/* Much too late to do any good, but alas... */
+	spin_lock(&t->lock);
+	for (i = 0; i < len; i++) {
+		if (test_bit(offset + i, t->map) == 0)
+			BUG();
+		__clear_bit(offset + i, t->map);
+	}
+	t->used -= len;
+	spin_unlock(&t->lock);
+}
+
+void bit_map_init(struct bit_map *t, unsigned long *map, int size)
+{
+
+	if ((size & 07) != 0)
+		BUG();
+	memset(map, 0, size>>3);
+
+	memset(t, 0, sizeof *t);
+	spin_lock_init(&t->lock);
+	t->map = map;
+	t->size = size;
+}
diff -urN -X dontdiff linux-2.5.68-bk5/arch/sparc/lib/Makefile \
                linux-2.5.68-bk5-sparc/arch/sparc/lib/Makefile
--- linux-2.5.68-bk5/arch/sparc/lib/Makefile	2003-03-24 14:01:46.000000000 -0800
+++ linux-2.5.68-bk5-sparc/arch/sparc/lib/Makefile	2003-04-24 12:26:24.000000000 \
-0700 @@ -10,4 +10,4 @@
          strlen.o checksum.o blockops.o memscan.o memcmp.o strncmp.o \
 	 strncpy_from_user.o divdi3.o udivdi3.o strlen_user.o \
 	 copy_user.o locks.o atomic.o bitops.o debuglocks.o lshrdi3.o \
-	 ashldi3.o rwsem.o muldi3.o
+	 ashldi3.o rwsem.o muldi3.o bitext.o
diff -urN -X dontdiff linux-2.5.68-bk5/arch/sparc/mm/init.c \
                linux-2.5.68-bk5-sparc/arch/sparc/mm/init.c
--- linux-2.5.68-bk5/arch/sparc/mm/init.c	2003-04-24 12:14:19.000000000 -0700
+++ linux-2.5.68-bk5-sparc/arch/sparc/mm/init.c	2003-04-24 12:26:24.000000000 -0700
@@ -88,8 +88,6 @@
 #endif
 }
 
-extern pgprot_t protection_map[16];
-
 void __init sparc_context_init(int numctx)
 {
 	int ctx;
diff -urN -X dontdiff linux-2.5.68-bk5/arch/sparc/mm/iommu.c \
                linux-2.5.68-bk5-sparc/arch/sparc/mm/iommu.c
--- linux-2.5.68-bk5/arch/sparc/mm/iommu.c	2003-03-24 14:01:47.000000000 -0800
+++ linux-2.5.68-bk5-sparc/arch/sparc/mm/iommu.c	2003-04-29 10:36:25.000000000 -0700
@@ -23,6 +23,18 @@
 #include <asm/mbus.h>
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
+#include <asm/bitext.h>
+#include <asm/iommu.h>
+
+/*
+ * This can be sized dynamically, but we will do this
+ * only when we have a guidance about actual I/O pressures.
+ */
+#define IOMMU_RNGE	IOMMU_RNGE_256MB
+#define IOMMU_START	0xF0000000
+#define IOMMU_WINSIZE	(256*1024*1024U)
+#define IOMMU_NPTES	(IOMMU_WINSIZE/PAGE_SIZE)	/* 64K PTEs, 265KB */
+#define IOMMU_ORDER	6				/* 4096 * (1<<6) */
 
 /* srmmu.c */
 extern int viking_mxcc_present;
@@ -34,34 +46,30 @@
 extern void viking_flush_page(unsigned long page);
 extern void viking_mxcc_flush_page(unsigned long page);
 
-#define IOPERM        (IOPTE_CACHE | IOPTE_WRITE | IOPTE_VALID)
-#define MKIOPTE(phys) (((((phys)>>4) & IOPTE_PAGE) | IOPERM) & ~IOPTE_WAZ)
-
-static inline void iommu_map_dvma_pages_for_iommu(struct iommu_struct *iommu)
-{
-	unsigned long kern_end = (unsigned long) high_memory;
-	unsigned long first = PAGE_OFFSET;
-	unsigned long last = kern_end;
-	iopte_t *iopte = iommu->page_table;
+/*
+ * Values precomputed according to CPU type.
+ */
+static unsigned int ioperm_noc;		/* Consistent mapping iopte flags */
+static pgprot_t dvma_prot;		/* Consistent mapping pte flags */
 
-	iopte += ((first - iommu->start) >> PAGE_SHIFT);
-	while(first <= last) {
-		*iopte++ = __iopte(MKIOPTE(__pa(first)));
-		first += PAGE_SIZE;
-	}
-}
+#define IOPERM        (IOPTE_CACHE | IOPTE_WRITE | IOPTE_VALID)
+#define MKIOPTE(pfn, perm) (((((pfn)<<8) & IOPTE_PAGE) | (perm)) & ~IOPTE_WAZ)
 
 void __init
 iommu_init(int iommund, struct sbus_bus *sbus)
 {
-	unsigned int impl, vers, ptsize;
+	unsigned int impl, vers;
 	unsigned long tmp;
 	struct iommu_struct *iommu;
 	struct linux_prom_registers iommu_promregs[PROMREG_MAX];
 	struct resource r;
-	int i;
+	unsigned long *bitmap;
 
 	iommu = kmalloc(sizeof(struct iommu_struct), GFP_ATOMIC);
+	if (!iommu) {
+		prom_printf("Unable to allocate iommu structure\n");
+		prom_halt();
+	}
 	prom_getproperty(iommund, "reg", (void *) iommu_promregs,
 			 sizeof(iommu_promregs));
 	memset(&r, 0, sizeof(r));
@@ -69,93 +77,129 @@
 	r.start = iommu_promregs[0].phys_addr;
 	iommu->regs = (struct iommu_regs *)
 		sbus_ioremap(&r, 0, PAGE_SIZE * 3, "iommu_regs");
-	if(!iommu->regs)
-		panic("Cannot map IOMMU registers.");
+	if(!iommu->regs) {
+		prom_printf("Cannot map IOMMU registers\n");
+		prom_halt();
+	}
 	impl = (iommu->regs->control & IOMMU_CTRL_IMPL) >> 28;
 	vers = (iommu->regs->control & IOMMU_CTRL_VERS) >> 24;
 	tmp = iommu->regs->control;
 	tmp &= ~(IOMMU_CTRL_RNGE);
-	switch(PAGE_OFFSET & 0xf0000000) {
-	case 0xf0000000:
-		tmp |= (IOMMU_RNGE_256MB | IOMMU_CTRL_ENAB);
-		iommu->plow = iommu->start = 0xf0000000;
-		break;
-	case 0xe0000000:
-		tmp |= (IOMMU_RNGE_512MB | IOMMU_CTRL_ENAB);
-		iommu->plow = iommu->start = 0xe0000000;
-		break;
-	case 0xd0000000:
-	case 0xc0000000:
-		tmp |= (IOMMU_RNGE_1GB | IOMMU_CTRL_ENAB);
-		iommu->plow = iommu->start = 0xc0000000;
-		break;
-	case 0xb0000000:
-	case 0xa0000000:
-	case 0x90000000:
-	case 0x80000000:
-		tmp |= (IOMMU_RNGE_2GB | IOMMU_CTRL_ENAB);
-		iommu->plow = iommu->start = 0x80000000;
-		break;
-	}
+	tmp |= (IOMMU_RNGE_256MB | IOMMU_CTRL_ENAB);
 	iommu->regs->control = tmp;
 	iommu_invalidate(iommu->regs);
+	iommu->start = IOMMU_START;
 	iommu->end = 0xffffffff;
 
 	/* Allocate IOMMU page table */
-	ptsize = iommu->end - iommu->start + 1;
-	ptsize = (ptsize >> PAGE_SHIFT) * sizeof(iopte_t);
-
 	/* Stupid alignment constraints give me a headache. 
 	   We need 256K or 512K or 1M or 2M area aligned to
            its size and current gfp will fortunately give
            it to us. */
-	for (i = 6; i < 9; i++)
-		if ((1 << (i + PAGE_SHIFT)) == ptsize)
-			break;
-        tmp = __get_free_pages(GFP_KERNEL, i);
+        tmp = __get_free_pages(GFP_KERNEL, IOMMU_ORDER);
 	if (!tmp) {
-		prom_printf("Could not allocate iopte of size 0x%08x\n", ptsize);
+		prom_printf("Unable to allocate iommu table [0x%08x]\n",
+			    IOMMU_NPTES*sizeof(iopte_t));
 		prom_halt();
 	}
-	iommu->lowest = iommu->page_table = (iopte_t *)tmp;
+	iommu->page_table = (iopte_t *)tmp;
 
 	/* Initialize new table. */
+	memset(iommu->page_table, 0, IOMMU_NPTES*sizeof(iopte_t));
 	flush_cache_all();
-	memset(iommu->page_table, 0, ptsize);
-	iommu_map_dvma_pages_for_iommu(iommu);
-	if(viking_mxcc_present) {
-		unsigned long start = (unsigned long) iommu->page_table;
-		unsigned long end = (start + ptsize);
+	flush_tlb_all();
+	iommu->regs->base = __pa((unsigned long) iommu->page_table) >> 4;
+	iommu_invalidate(iommu->regs);
+
+	bitmap = kmalloc(IOMMU_NPTES>>3, GFP_KERNEL);
+	if (!bitmap) {
+		prom_printf("Unable to allocate iommu bitmap [%d]\n",
+			    (int)(IOMMU_NPTES>>3));
+		prom_halt();
+	}
+	bit_map_init(&iommu->usemap, bitmap, IOMMU_NPTES);
+
+	printk("IOMMU: impl %d vers %d table 0x%p[%d B] map [%d b]\n",
+	    impl, vers, iommu->page_table,
+	    (int)(IOMMU_NPTES*sizeof(iopte_t)), (int)IOMMU_NPTES);
+
+	sbus->iommu = iommu;
+}
+
+/* This begs to be btfixup-ed by srmmu. */
+static void iommu_viking_flush_iotlb(iopte_t *iopte, unsigned int niopte)
+{
+	unsigned long start;
+	unsigned long end;
+
+	start = (unsigned long)iopte & PAGE_MASK;
+	end = PAGE_ALIGN(start + niopte*sizeof(iopte_t));
+	if (viking_mxcc_present) {
 		while(start < end) {
 			viking_mxcc_flush_page(start);
 			start += PAGE_SIZE;
 		}
 	} else if (viking_flush) {
-		unsigned long start = (unsigned long) iommu->page_table;
-		unsigned long end = (start + ptsize);
 		while(start < end) {
 			viking_flush_page(start);
 			start += PAGE_SIZE;
 		}
 	}
-	flush_tlb_all();
-	iommu->regs->base = __pa((unsigned long) iommu->page_table) >> 4;
-	iommu_invalidate(iommu->regs);
+}
 
-	sbus->iommu = iommu;
-	printk("IOMMU: impl %d vers %d page table at %p of size %d bytes\n",
-	       impl, vers, iommu->page_table, ptsize);
+static u32 iommu_get_one(struct page *page, int npages, struct sbus_bus *sbus)
+{
+	struct iommu_struct *iommu = sbus->iommu;
+	int ioptex;
+	iopte_t *iopte, *iopte0;
+	unsigned int busa, busa0;
+	int i;
+
+	ioptex = bit_map_string_get(&iommu->usemap, npages, 1);
+	if (ioptex < 0)
+		panic("iommu out");
+	busa0 = iommu->start + (ioptex << PAGE_SHIFT);
+	iopte0 = &iommu->page_table[ioptex];
+
+	busa = busa0;
+	iopte = iopte0;
+	for (i = 0; i < npages; i++) {
+		iopte_val(*iopte) = MKIOPTE(page_to_pfn(page), IOPERM);
+		iommu_invalidate_page(iommu->regs, busa);
+		busa += PAGE_SIZE;
+		iopte++;
+		page++;
+	}
+
+	iommu_viking_flush_iotlb(iopte0, npages);
+
+	return busa0;
+}
+
+static u32 iommu_get_scsi_one(char *vaddr, unsigned int len,
+    struct sbus_bus *sbus)
+{
+	unsigned long off;
+	int npages;
+	struct page *page;
+	u32 busa;
+
+	off = (unsigned long)vaddr & ~PAGE_MASK;
+	npages = (off + len + PAGE_SIZE-1) >> PAGE_SHIFT;
+	page = virt_to_page((unsigned long)vaddr & PAGE_MASK);
+	busa = iommu_get_one(page, npages, sbus);
+	return busa + off;
 }
 
 static __u32 iommu_get_scsi_one_noflush(char *vaddr, unsigned long len, struct \
sbus_bus *sbus)  {
-	return (__u32)vaddr;
+	return iommu_get_scsi_one(vaddr, len, sbus);
 }
 
 static __u32 iommu_get_scsi_one_gflush(char *vaddr, unsigned long len, struct \
sbus_bus *sbus)  {
 	flush_page_for_dma(0);
-	return (__u32)vaddr;
+	return iommu_get_scsi_one(vaddr, len, sbus);
 }
 
 static __u32 iommu_get_scsi_one_pflush(char *vaddr, unsigned long len, struct \
sbus_bus *sbus) @@ -166,75 +210,129 @@
 		flush_page_for_dma(page);
 		page += PAGE_SIZE;
 	}
-	return (__u32)vaddr;
+	return iommu_get_scsi_one(vaddr, len, sbus);
 }
 
 static void iommu_get_scsi_sgl_noflush(struct scatterlist *sg, int sz, struct \
sbus_bus *sbus)  {
+	int n;
+
 	while (sz != 0) {
 		--sz;
-		sg[sz].dvma_address = (__u32) (page_address(sg[sz].page) + sg[sz].offset);
-		sg[sz].dvma_length = (__u32) (sg[sz].length);
+		n = (sg->length + sg->offset + PAGE_SIZE-1) >> PAGE_SHIFT;
+		sg->dvma_address = iommu_get_one(sg->page, n, sbus) + sg->offset;
+		sg->dvma_length = (__u32) sg->length;
+		sg++;
 	}
 }
 
 static void iommu_get_scsi_sgl_gflush(struct scatterlist *sg, int sz, struct \
sbus_bus *sbus)  {
+	int n;
+
 	flush_page_for_dma(0);
 	while (sz != 0) {
 		--sz;
-		sg[sz].dvma_address = (__u32) (page_address(sg[sz].page) + sg[sz].offset);
-		sg[sz].dvma_length = (__u32) (sg[sz].length);
+		n = (sg->length + sg->offset + PAGE_SIZE-1) >> PAGE_SHIFT;
+		sg->dvma_address = iommu_get_one(sg->page, n, sbus) + sg->offset;
+		sg->dvma_length = (__u32) sg->length;
+		sg++;
 	}
 }
 
 static void iommu_get_scsi_sgl_pflush(struct scatterlist *sg, int sz, struct \
sbus_bus *sbus)  {
 	unsigned long page, oldpage = 0;
+	int n, i;
 
 	while(sz != 0) {
 		--sz;
-		page = (unsigned long) page_address(sg[sz].page);
-		if (oldpage == page)
-			page += PAGE_SIZE; /* We flushed that page already */
-		while(page < (unsigned long)(page_address(sg[sz].page) + sg[sz].offset + \
                sg[sz].length)) {
-			flush_page_for_dma(page);
-			page += PAGE_SIZE;
+
+		n = (sg->length + sg->offset + PAGE_SIZE-1) >> PAGE_SHIFT;
+
+		/*
+		 * We expect unmapped highmem pages to be not in the cache.
+		 * XXX Is this a good assumption?
+		 * XXX What if someone else unmaps it here and races us?
+		 */
+		if ((page = (unsigned long) page_address(sg->page)) != 0) {
+			for (i = 0; i < n; i++) {
+				if (page != oldpage) {	/* Already flushed? */
+					flush_page_for_dma(page);
+					oldpage = page;
+				}
+				page += PAGE_SIZE;
+			}
 		}
-		sg[sz].dvma_address = (__u32) (page_address(sg[sz].page) + sg[sz].offset);
-		sg[sz].dvma_length = (__u32) (sg[sz].length);
-		oldpage = page - PAGE_SIZE;
+
+		sg->dvma_address = iommu_get_one(sg->page, n, sbus) + sg->offset;
+		sg->dvma_length = (__u32) sg->length;
+		sg++;
+	}
+}
+
+static void iommu_release_one(u32 busa, int npages, struct sbus_bus *sbus)
+{
+	struct iommu_struct *iommu = sbus->iommu;
+	int ioptex;
+	int i;
+
+	if (busa < iommu->start)
+		BUG();
+	ioptex = (busa - iommu->start) >> PAGE_SHIFT;
+	for (i = 0; i < npages; i++) {
+		iopte_val(iommu->page_table[ioptex + i]) = 0;
+		iommu_invalidate_page(iommu->regs, busa);
+		busa += PAGE_SIZE;
 	}
+	bit_map_clear(&iommu->usemap, ioptex, npages);
 }
 
 static void iommu_release_scsi_one(__u32 vaddr, unsigned long len, struct sbus_bus \
*sbus)  {
+	unsigned long off;
+	int npages;
+
+	off = vaddr & ~PAGE_MASK;
+	npages = (off + len + PAGE_SIZE-1) >> PAGE_SHIFT;
+	iommu_release_one(vaddr & PAGE_MASK, npages, sbus);
 }
 
 static void iommu_release_scsi_sgl(struct scatterlist *sg, int sz, struct sbus_bus \
*sbus)  {
+	int n;
+
+	while(sz != 0) {
+		--sz;
+
+		n = (sg->length + sg->offset + PAGE_SIZE-1) >> PAGE_SHIFT;
+		iommu_release_one(sg->dvma_address & PAGE_MASK, n, sbus);
+		sg->dvma_address = 0x21212121;
+		sg++;
+	}
 }
 
 #ifdef CONFIG_SBUS
-static void iommu_map_dma_area(unsigned long va, __u32 addr, int len)
+static int iommu_map_dma_area(dma_addr_t *pba, unsigned long va,
+    unsigned long addr, int len)
 {
-	unsigned long page, end, ipte_cache;
-	pgprot_t dvma_prot;
+	unsigned long page, end;
 	struct iommu_struct *iommu = sbus_root->iommu;
 	iopte_t *iopte = iommu->page_table;
 	iopte_t *first;
+	int ioptex;
 
-	if(viking_mxcc_present || srmmu_modtype == HyperSparc) {
-		dvma_prot = __pgprot(SRMMU_CACHE | SRMMU_ET_PTE | SRMMU_PRIV);
-		ipte_cache = 1;
-	} else {
-		dvma_prot = __pgprot(SRMMU_ET_PTE | SRMMU_PRIV);
-		ipte_cache = 0;
-	}
+	if ((va & ~PAGE_MASK) != 0) BUG();
+	if ((addr & ~PAGE_MASK) != 0) BUG();
+	if ((len & ~PAGE_MASK) != 0) BUG();
+
+	ioptex = bit_map_string_get(&iommu->usemap, len >> PAGE_SHIFT, 1);
+	if (ioptex < 0)
+		panic("iommu out");
 
-	iopte += ((addr - iommu->start) >> PAGE_SHIFT);
+	iopte += ioptex;
 	first = iopte;
-	end = PAGE_ALIGN((addr + len));
+	end = addr + len;
 	while(addr < end) {
 		page = va;
 		{
@@ -252,16 +350,11 @@
 			pgdp = pgd_offset(&init_mm, addr);
 			pmdp = pmd_offset(pgdp, addr);
 			ptep = pte_offset_map(pmdp, addr);
-			/* XXX What if we run out of atomic maps above */
 
 			set_pte(ptep, mk_pte(virt_to_page(page), dvma_prot));
-			if (ipte_cache != 0) {
-				iopte_val(*iopte++) = MKIOPTE(__pa(page));
-			} else {
-				iopte_val(*iopte++) =
-					MKIOPTE(__pa(page)) & ~IOPTE_CACHE;
-			}
 		}
+		iopte_val(*iopte++) =
+		    MKIOPTE(page_to_pfn(virt_to_page(page)), ioperm_noc);
 		addr += PAGE_SIZE;
 		va += PAGE_SIZE;
 	}
@@ -277,23 +370,12 @@
 	 *        to handle the latter case as well.
 	 */
 	flush_cache_all();
-	if(viking_mxcc_present) {
-		unsigned long start = ((unsigned long) first) & PAGE_MASK;
-		unsigned long end = PAGE_ALIGN(((unsigned long) iopte));
-		while(start < end) {
-			viking_mxcc_flush_page(start);
-			start += PAGE_SIZE;
-		}
-	} else if(viking_flush) {
-		unsigned long start = ((unsigned long) first) & PAGE_MASK;
-		unsigned long end = PAGE_ALIGN(((unsigned long) iopte));
-		while(start < end) {
-			viking_flush_page(start);
-			start += PAGE_SIZE;
-		}
-	}
+	iommu_viking_flush_iotlb(first, len >> PAGE_SHIFT);
 	flush_tlb_all();
 	iommu_invalidate(iommu->regs);
+
+	*pba = iommu->start + (ioptex << PAGE_SHIFT);
+	return 0;
 }
 
 static void iommu_unmap_dma_area(unsigned long busa, int len)
@@ -301,27 +383,29 @@
 	struct iommu_struct *iommu = sbus_root->iommu;
 	iopte_t *iopte = iommu->page_table;
 	unsigned long end;
+	int ioptex = (busa - iommu->start) >> PAGE_SHIFT;
 
-	iopte += ((busa - iommu->start) >> PAGE_SHIFT);
-	end = PAGE_ALIGN((busa + len));
+	if ((busa & ~PAGE_MASK) != 0) BUG();
+	if ((len & ~PAGE_MASK) != 0) BUG();
+
+	iopte += ioptex;
+	end = busa + len;
 	while (busa < end) {
 		iopte_val(*iopte++) = 0;
 		busa += PAGE_SIZE;
 	}
-	flush_tlb_all();	/* P3: Hmm... it would not hurt. */
+	flush_tlb_all();
 	iommu_invalidate(iommu->regs);
+	bit_map_clear(&iommu->usemap, ioptex, len >> PAGE_SHIFT);
 }
 
-static unsigned long iommu_translate_dvma(unsigned long busa)
+static struct page *iommu_translate_dvma(unsigned long busa)
 {
 	struct iommu_struct *iommu = sbus_root->iommu;
 	iopte_t *iopte = iommu->page_table;
-	unsigned long pa;
 
 	iopte += ((busa - iommu->start) >> PAGE_SHIFT);
-	pa = pte_val(*iopte);
-	pa = (pa & 0xFFFFFFF0) << 4;		/* Loose higher bits of 36 */
-	return pa + PAGE_OFFSET;
+	return pfn_to_page((pte_val(*iopte) & IOPTE_PAGE) >> (PAGE_SHIFT-4));
 }
 #endif
 
@@ -352,12 +436,20 @@
 		BTFIXUPSET_CALL(mmu_get_scsi_one, iommu_get_scsi_one_pflush, BTFIXUPCALL_NORM);
 		BTFIXUPSET_CALL(mmu_get_scsi_sgl, iommu_get_scsi_sgl_pflush, BTFIXUPCALL_NORM);
 	}
-	BTFIXUPSET_CALL(mmu_release_scsi_one, iommu_release_scsi_one, BTFIXUPCALL_NOP);
-	BTFIXUPSET_CALL(mmu_release_scsi_sgl, iommu_release_scsi_sgl, BTFIXUPCALL_NOP);
+	BTFIXUPSET_CALL(mmu_release_scsi_one, iommu_release_scsi_one, BTFIXUPCALL_NORM);
+	BTFIXUPSET_CALL(mmu_release_scsi_sgl, iommu_release_scsi_sgl, BTFIXUPCALL_NORM);
 
 #ifdef CONFIG_SBUS
 	BTFIXUPSET_CALL(mmu_map_dma_area, iommu_map_dma_area, BTFIXUPCALL_NORM);
 	BTFIXUPSET_CALL(mmu_unmap_dma_area, iommu_unmap_dma_area, BTFIXUPCALL_NORM);
 	BTFIXUPSET_CALL(mmu_translate_dvma, iommu_translate_dvma, BTFIXUPCALL_NORM);
 #endif
+
+	if (viking_mxcc_present || srmmu_modtype == HyperSparc) {
+		dvma_prot = __pgprot(SRMMU_CACHE | SRMMU_ET_PTE | SRMMU_PRIV);
+		ioperm_noc = IOPTE_CACHE | IOPTE_WRITE | IOPTE_VALID;
+	} else {
+		dvma_prot = __pgprot(SRMMU_ET_PTE | SRMMU_PRIV);
+		ioperm_noc = IOPTE_WRITE | IOPTE_VALID;
+	}
 }
diff -urN -X dontdiff linux-2.5.68-bk5/arch/sparc/mm/io-unit.c \
                linux-2.5.68-bk5-sparc/arch/sparc/mm/io-unit.c
--- linux-2.5.68-bk5/arch/sparc/mm/io-unit.c	2003-03-24 14:00:21.000000000 -0800
+++ linux-2.5.68-bk5-sparc/arch/sparc/mm/io-unit.c	2003-04-24 12:26:24.000000000 \
-0700 @@ -176,13 +176,15 @@
 }
 
 #ifdef CONFIG_SBUS
-static void iounit_map_dma_area(unsigned long va, __u32 addr, int len)
+static int iounit_map_dma_area(dma_addr_t *pba, unsigned long va, __u32 addr, int \
len)  {
 	unsigned long page, end;
 	pgprot_t dvma_prot;
 	iopte_t *iopte;
 	struct sbus_bus *sbus;
 
+	*pba = addr;
+
 	dvma_prot = __pgprot(SRMMU_CACHE | SRMMU_ET_PTE | SRMMU_PRIV);
 	end = PAGE_ALIGN((addr + len));
 	while(addr < end) {
@@ -213,6 +215,8 @@
 	}
 	flush_cache_all();
 	flush_tlb_all();
+
+	return 0;
 }
 
 static void iounit_unmap_dma_area(unsigned long addr, int len)
@@ -221,7 +225,7 @@
 }
 
 /* XXX We do not pass sbus device here, bad. */
-static unsigned long iounit_translate_dvma(unsigned long addr)
+static struct page *iounit_translate_dvma(unsigned long addr)
 {
 	struct sbus_bus *sbus = sbus_root;	/* They are all the same */
 	struct iounit_struct *iounit = (struct iounit_struct *)sbus->iommu;
@@ -230,7 +234,7 @@
 
 	i = ((addr - IOUNIT_DMA_BASE) >> PAGE_SHIFT);
 	iopte = (iopte_t *)(iounit->page_table + i);
-	return (iopte_val(*iopte) & 0xFFFFFFF0) << 4; /* XXX sun4d guru, help */
+	return pfn_to_page(iopte_val(*iopte) >> (PAGE_SHIFT-4)); /* XXX sun4d guru, help */
 }
 #endif
 
diff -urN -X dontdiff linux-2.5.68-bk5/arch/sparc/mm/loadmmu.c \
                linux-2.5.68-bk5-sparc/arch/sparc/mm/loadmmu.c
--- linux-2.5.68-bk5/arch/sparc/mm/loadmmu.c	2003-03-24 14:00:50.000000000 -0800
+++ linux-2.5.68-bk5-sparc/arch/sparc/mm/loadmmu.c	2003-04-28 22:19:23.000000000 \
-0700 @@ -26,7 +26,6 @@
 
 extern void ld_mmu_sun4c(void);
 extern void ld_mmu_srmmu(void);
-extern void ioport_init(void);
 
 void __init load_mmu(void)
 {
@@ -44,5 +43,4 @@
 		prom_halt();
 	}
 	btfixup();
-	ioport_init();
 }
diff -urN -X dontdiff linux-2.5.68-bk5/arch/sparc/mm/srmmu.c \
                linux-2.5.68-bk5-sparc/arch/sparc/mm/srmmu.c
--- linux-2.5.68-bk5/arch/sparc/mm/srmmu.c	2003-04-19 21:54:56.000000000 -0700
+++ linux-2.5.68-bk5-sparc/arch/sparc/mm/srmmu.c	2003-04-29 10:35:14.000000000 -0700
@@ -21,6 +21,7 @@
 #include <linux/fs.h>
 #include <linux/seq_file.h>
 
+#include <asm/bitext.h>
 #include <asm/page.h>
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
@@ -137,29 +138,26 @@
 int srmmu_cache_pagetables;
 
 /* these will be initialized in srmmu_nocache_calcsize() */
-int srmmu_nocache_npages;
 unsigned long srmmu_nocache_size;
 unsigned long srmmu_nocache_end;
 unsigned long pkmap_base;
 unsigned long pkmap_base_end;
-unsigned long srmmu_nocache_bitmap_size;
 extern unsigned long fix_kmap_begin;
 extern unsigned long fix_kmap_end;
 
+/* 1 bit <=> 256 bytes of nocache <=> 64 PTEs */
 #define SRMMU_NOCACHE_BITMAP_SHIFT (PAGE_SHIFT - 4)
 
 void *srmmu_nocache_pool;
 void *srmmu_nocache_bitmap;
-int srmmu_nocache_low;
-int srmmu_nocache_used;
-static spinlock_t srmmu_nocache_spinlock = SPIN_LOCK_UNLOCKED;
+static struct bit_map srmmu_nocache_map;
 
 /* This makes sense. Honest it does - Anton */
 #define __nocache_pa(VADDR) (((unsigned long)VADDR) - SRMMU_NOCACHE_VADDR + \
__pa((unsigned long)srmmu_nocache_pool))  #define __nocache_va(PADDR) (__va((unsigned \
long)PADDR) - (unsigned long)srmmu_nocache_pool + SRMMU_NOCACHE_VADDR)  #define \
__nocache_fix(VADDR) __va(__nocache_pa(VADDR))  
-static inline unsigned long srmmu_pte_pfn(pte_t pte)
+static unsigned long srmmu_pte_pfn(pte_t pte)
 {
 	if (srmmu_device_memory(pte_val(pte))) {
 		/* XXX Anton obviously had something in mind when he did this.
@@ -219,15 +217,6 @@
 static inline void srmmu_pgd_clear(pgd_t * pgdp)
 { srmmu_set_pte((pte_t *)pgdp, __pte(0)); }
 
-static inline int srmmu_pte_write(pte_t pte)
-{ return pte_val(pte) & SRMMU_WRITE; }
-
-static inline int srmmu_pte_dirty(pte_t pte)
-{ return pte_val(pte) & SRMMU_DIRTY; }
-
-static inline int srmmu_pte_young(pte_t pte)
-{ return pte_val(pte) & SRMMU_REF; }
-
 static inline pte_t srmmu_pte_wrprotect(pte_t pte)
 { return __pte(pte_val(pte) & ~SRMMU_WRITE);}
 
@@ -321,10 +310,7 @@
  */
 static unsigned long __srmmu_get_nocache(int size, int align)
 {
-	int offset = srmmu_nocache_low;
-	int i;
-	unsigned long va_tmp, phys_tmp;
-	int lowest_failed = 0;
+	int offset;
 
 	if (size < SRMMU_NOCACHE_BITMAP_SHIFT) {
 		printk("Size 0x%x too small for nocache request\n", size);
@@ -334,49 +320,20 @@
 		printk("Size 0x%x unaligned int nocache request\n", size);
 		size += SRMMU_NOCACHE_BITMAP_SHIFT-1;
 	}
-	size = size >> SRMMU_NOCACHE_BITMAP_SHIFT;
-
-	spin_lock(&srmmu_nocache_spinlock);
-
-repeat:
-	offset = find_next_zero_bit(srmmu_nocache_bitmap, srmmu_nocache_bitmap_size, \
offset);  
-	/* we align on physical address */
-	if (align) {
-		va_tmp = (SRMMU_NOCACHE_VADDR + (offset << SRMMU_NOCACHE_BITMAP_SHIFT));
-		phys_tmp = (__nocache_pa(va_tmp) + align - 1) & ~(align - 1);
-		va_tmp = (unsigned long)__nocache_va(phys_tmp);
-		offset = (va_tmp - SRMMU_NOCACHE_VADDR) >> SRMMU_NOCACHE_BITMAP_SHIFT;
-	}
-
-	if ((srmmu_nocache_bitmap_size - offset) < size) {
-		printk("Run out of nocached RAM!\n");
-		spin_unlock(&srmmu_nocache_spinlock);
+	offset = bit_map_string_get(&srmmu_nocache_map,
+		       			size >> SRMMU_NOCACHE_BITMAP_SHIFT,
+					align >> SRMMU_NOCACHE_BITMAP_SHIFT);
+/* P3 */ /* printk("srmmu: get size %d align %d, got %d (0x%x)\n",
+   size >> SRMMU_NOCACHE_BITMAP_SHIFT, align >> SRMMU_NOCACHE_BITMAP_SHIFT,
+   offset, offset); */
+	if (offset == -1) {
+		printk("srmmu: out of nocache %d: %d/%d\n",
+		    size, (int) srmmu_nocache_size,
+		    srmmu_nocache_map.used << SRMMU_NOCACHE_BITMAP_SHIFT);
 		return 0;
 	}
 
-	i = 0;
-	while(i < size) {
-		if (test_bit(offset + i, srmmu_nocache_bitmap)) {
-			lowest_failed = 1;
-			offset = offset + i + 1;
-			goto repeat;
-		}
-		i++;
-	}
-
-	i = 0;
-	while(i < size) {
-		set_bit(offset + i, srmmu_nocache_bitmap);
-		i++;
-		srmmu_nocache_used++;
-	}
-
-	if (!lowest_failed && ((align >> SRMMU_NOCACHE_BITMAP_SHIFT) <= 1) && (offset > \
                srmmu_nocache_low))
-		srmmu_nocache_low = offset;
-
-	spin_unlock(&srmmu_nocache_spinlock);
-
 	return (SRMMU_NOCACHE_VADDR + (offset << SRMMU_NOCACHE_BITMAP_SHIFT));
 }
 
@@ -422,63 +379,57 @@
 	offset = (vaddr - SRMMU_NOCACHE_VADDR) >> SRMMU_NOCACHE_BITMAP_SHIFT;
 	size = size >> SRMMU_NOCACHE_BITMAP_SHIFT;
 
-	spin_lock(&srmmu_nocache_spinlock);
-
-	while(size--) {
-		clear_bit(offset + size, srmmu_nocache_bitmap);
-		srmmu_nocache_used--;
-	}
-
-	if (offset < srmmu_nocache_low)
-		srmmu_nocache_low = offset;
-
-	spin_unlock(&srmmu_nocache_spinlock);
+/* P3 */ /* printk("srmmu: free off %d (0x%x) size %d\n", offset, offset, size); */
+	bit_map_clear(&srmmu_nocache_map, offset, size);
 }
 
 void srmmu_early_allocate_ptable_skeleton(unsigned long start, unsigned long end);
 
 extern unsigned long probe_memory(void);	/* in fault.c */
 
-/* Reserve nocache dynamically proportionally to the amount of
+/*
+ * Reserve nocache dynamically proportionally to the amount of
  * system RAM. -- Tomas Szepe <szepe@pinerecords.com>, June 2002
  */
 void srmmu_nocache_calcsize(void)
 {
 	unsigned long sysmemavail = probe_memory() / 1024;
+	int srmmu_nocache_npages;
 
 	srmmu_nocache_npages =
 		sysmemavail / SRMMU_NOCACHE_ALCRATIO / 1024 * 256;
-	if (sysmemavail % (SRMMU_NOCACHE_ALCRATIO * 1024))
-		srmmu_nocache_npages += 256;
+
+ /* P3 XXX The 4x overuse: corroborated by /proc/meminfo. */
+	// if (srmmu_nocache_npages < 256) srmmu_nocache_npages = 256;
+	if (srmmu_nocache_npages < 550) srmmu_nocache_npages = 550;
 
 	/* anything above 1280 blows up */
 	if (srmmu_nocache_npages > 1280) srmmu_nocache_npages = 1280;
 
 	srmmu_nocache_size = srmmu_nocache_npages * PAGE_SIZE;
-	srmmu_nocache_bitmap_size = srmmu_nocache_npages * 16;
 	srmmu_nocache_end = SRMMU_NOCACHE_VADDR + srmmu_nocache_size;
 	fix_kmap_begin = srmmu_nocache_end;
 	fix_kmap_end = fix_kmap_begin + (KM_TYPE_NR * NR_CPUS - 1) * PAGE_SIZE;
 	pkmap_base = SRMMU_NOCACHE_VADDR + srmmu_nocache_size + 0x40000;
 	pkmap_base_end = pkmap_base + LAST_PKMAP * PAGE_SIZE;
-
-	/* printk("system memory available = %luk\nnocache ram size = %luk\n",
-		sysmemavail, srmmu_nocache_size / 1024); */
 }
 
 void srmmu_nocache_init(void)
 {
+	unsigned int bitmap_bits;
 	pgd_t *pgd;
 	pmd_t *pmd;
 	pte_t *pte;
 	unsigned long paddr, vaddr;
 	unsigned long pteval;
 
+	bitmap_bits = srmmu_nocache_size >> SRMMU_NOCACHE_BITMAP_SHIFT;
+
 	srmmu_nocache_pool = __alloc_bootmem(srmmu_nocache_size, PAGE_SIZE, 0UL);
 	memset(srmmu_nocache_pool, 0, srmmu_nocache_size);
 
-	srmmu_nocache_bitmap = __alloc_bootmem(srmmu_nocache_bitmap_size, SMP_CACHE_BYTES, \
                0UL);
-	memset(srmmu_nocache_bitmap, 0, srmmu_nocache_bitmap_size);
+	srmmu_nocache_bitmap = __alloc_bootmem(bitmap_bits >> 3, SMP_CACHE_BYTES, 0UL);
+	bit_map_init(&srmmu_nocache_map, srmmu_nocache_bitmap, bitmap_bits);
 
 	srmmu_swapper_pg_dir = (pgd_t *)__srmmu_get_nocache(SRMMU_PGD_TABLE_SIZE, \
SRMMU_PGD_TABLE_SIZE);  memset(__nocache_fix(srmmu_swapper_pg_dir), 0, \
SRMMU_PGD_TABLE_SIZE); @@ -486,11 +437,12 @@
 
 	srmmu_early_allocate_ptable_skeleton(SRMMU_NOCACHE_VADDR, srmmu_nocache_end);
 
-	spin_lock_init(&srmmu_nocache_spinlock);
-
 	paddr = __pa((unsigned long)srmmu_nocache_pool);
 	vaddr = SRMMU_NOCACHE_VADDR;
 
+/* P3 */ printk("srmmu: pool 0x%x vaddr 0x%x bitmap 0x%x bits %d (0x%x)\n",
+  (int)srmmu_nocache_pool, vaddr, srmmu_nocache_bitmap, bitmap_bits, bitmap_bits);
+
 	while (vaddr < srmmu_nocache_end) {
 		pgd = pgd_offset_k(vaddr);
 		pmd = srmmu_pmd_offset(__nocache_fix(pgd), vaddr);
@@ -637,7 +589,8 @@
 }
 
 /* Low level IO area allocation on the SRMMU. */
-void srmmu_mapioaddr(unsigned long physaddr, unsigned long virt_addr, int bus_type, \
int rdonly) +static inline void srmmu_mapioaddr(unsigned long physaddr,
+    unsigned long virt_addr, int bus_type)
 {
 	pgd_t *pgdp;
 	pmd_t *pmdp;
@@ -656,16 +609,24 @@
 	 * 36-bit physical address on the I/O space lines...
 	 */
 	tmp |= (bus_type << 28);
-	if(rdonly)
-		tmp |= SRMMU_PRIV_RDONLY;
-	else
-		tmp |= SRMMU_PRIV;
+	tmp |= SRMMU_PRIV;
 	__flush_page_to_ram(virt_addr);
 	srmmu_set_pte(ptep, __pte(tmp));
+}
+
+static void srmmu_mapiorange(unsigned int bus, unsigned long xpa,
+    unsigned long xva, unsigned int len)
+{
+	while (len != 0) {
+		len -= PAGE_SIZE;
+		srmmu_mapioaddr(xpa, xva, bus);
+		xva += PAGE_SIZE;
+		xpa += PAGE_SIZE;
+	}
 	flush_tlb_all();
 }
 
-void srmmu_unmapioaddr(unsigned long virt_addr)
+static inline void srmmu_unmapioaddr(unsigned long virt_addr)
 {
 	pgd_t *pgdp;
 	pmd_t *pmdp;
@@ -677,6 +638,15 @@
 
 	/* No need to flush uncacheable page. */
 	srmmu_pte_clear(ptep);
+}
+
+static void srmmu_unmapiorange(unsigned long virt_addr, unsigned int len)
+{
+	while (len != 0) {
+		len -= PAGE_SIZE;
+		srmmu_unmapioaddr(virt_addr);
+		virt_addr += PAGE_SIZE;
+	}
 	flush_tlb_all();
 }
 
@@ -1398,7 +1368,7 @@
 		   srmmu_name,
 		   num_contexts,
 		   srmmu_nocache_size,
-		   (srmmu_nocache_used << SRMMU_NOCACHE_BITMAP_SHIFT));
+		   srmmu_nocache_map.used << SRMMU_NOCACHE_BITMAP_SHIFT);
 }
 
 static void srmmu_update_mmu_cache(struct vm_area_struct * vma, unsigned long \
address, pte_t pte) @@ -2258,7 +2228,10 @@
 	BTFIXUPSET_CALL(pte_mkyoung, srmmu_pte_mkyoung, BTFIXUPCALL_ORINT(SRMMU_REF));
 	BTFIXUPSET_CALL(update_mmu_cache, srmmu_update_mmu_cache, BTFIXUPCALL_NOP);
 	BTFIXUPSET_CALL(destroy_context, srmmu_destroy_context, BTFIXUPCALL_NORM);
-	
+
+	BTFIXUPSET_CALL(sparc_mapiorange, srmmu_mapiorange, BTFIXUPCALL_NORM);
+	BTFIXUPSET_CALL(sparc_unmapiorange, srmmu_unmapiorange, BTFIXUPCALL_NORM);
+
 	BTFIXUPSET_CALL(mmu_info, srmmu_mmu_info, BTFIXUPCALL_NORM);
 
 	BTFIXUPSET_CALL(alloc_thread_info, srmmu_alloc_thread_info, BTFIXUPCALL_NORM);
diff -urN -X dontdiff linux-2.5.68-bk5/arch/sparc/mm/sun4c.c \
                linux-2.5.68-bk5-sparc/arch/sparc/mm/sun4c.c
--- linux-2.5.68-bk5/arch/sparc/mm/sun4c.c	2003-04-24 12:14:19.000000000 -0700
+++ linux-2.5.68-bk5-sparc/arch/sparc/mm/sun4c.c	2003-04-28 22:01:52.000000000 -0700
@@ -534,10 +534,13 @@
 }
 
 /* Addr is always aligned on a page boundary for us already. */
-static void sun4c_map_dma_area(unsigned long va, u32 addr, int len)
+static int sun4c_map_dma_area(dma_addr_t *pba, unsigned long va,
+    unsigned long addr, int len)
 {
 	unsigned long page, end;
 
+	*pba = addr;
+
 	end = PAGE_ALIGN((addr + len));
 	while (addr < end) {
 		page = va;
@@ -550,13 +553,15 @@
 		addr += PAGE_SIZE;
 		va += PAGE_SIZE;
 	}
+
+	return 0;
 }
 
-static unsigned long sun4c_translate_dvma(unsigned long busa)
+static struct page *sun4c_translate_dvma(unsigned long busa)
 {
 	/* Fortunately for us, bus_addr == uncached_virt in sun4c. */
 	unsigned long pte = sun4c_get_pte(busa);
-	return (pte << PAGE_SHIFT) + PAGE_OFFSET;
+	return pfn_to_page(pte & SUN4C_PFN_MASK);
 }
 
 static void sun4c_unmap_dma_area(unsigned long busa, int len)
@@ -1578,21 +1583,33 @@
 	}
 }
 
-void sun4c_mapioaddr(unsigned long physaddr, unsigned long virt_addr,
-		     int bus_type, int rdonly)
+static inline void sun4c_mapioaddr(unsigned long physaddr, unsigned long virt_addr)
 {
 	unsigned long page_entry;
 
 	page_entry = ((physaddr >> PAGE_SHIFT) & SUN4C_PFN_MASK);
 	page_entry |= ((pg_iobits | _SUN4C_PAGE_PRIV) & ~(_SUN4C_PAGE_PRESENT));
-	if (rdonly)
-		page_entry &= ~_SUN4C_WRITEABLE;
 	sun4c_put_pte(virt_addr, page_entry);
 }
 
-void sun4c_unmapioaddr(unsigned long virt_addr)
+static void sun4c_mapiorange(unsigned int bus, unsigned long xpa,
+    unsigned long xva, unsigned int len)
 {
-	sun4c_put_pte(virt_addr, 0);
+	while (len != 0) {
+		len -= PAGE_SIZE;
+		sun4c_mapioaddr(xpa, xva);
+		xva += PAGE_SIZE;
+		xpa += PAGE_SIZE;
+	}
+}
+
+static void sun4c_unmapiorange(unsigned long virt_addr, unsigned int len)
+{
+	while (len != 0) {
+		len -= PAGE_SIZE;
+		sun4c_put_pte(virt_addr, 0);
+		virt_addr += PAGE_SIZE;
+	}
 }
 
 static void sun4c_alloc_context(struct mm_struct *old_mm, struct mm_struct *mm)
@@ -1783,7 +1800,7 @@
  */
 static pte_t sun4c_mk_pte(struct page *page, pgprot_t pgprot)
 {
-	return __pte((page - mem_map) | pgprot_val(pgprot));
+	return __pte(page_to_pfn(page) | pgprot_val(pgprot));
 }
 
 static pte_t sun4c_mk_pte_phys(unsigned long phys_page, pgprot_t pgprot)
@@ -2225,6 +2242,9 @@
 	BTFIXUPSET_CALL(mmu_unmap_dma_area, sun4c_unmap_dma_area, BTFIXUPCALL_NORM);
 	BTFIXUPSET_CALL(mmu_translate_dvma, sun4c_translate_dvma, BTFIXUPCALL_NORM);
 
+	BTFIXUPSET_CALL(sparc_mapiorange, sun4c_mapiorange, BTFIXUPCALL_NORM);
+	BTFIXUPSET_CALL(sparc_unmapiorange, sun4c_unmapiorange, BTFIXUPCALL_NORM);
+
 	BTFIXUPSET_CALL(alloc_thread_info, sun4c_alloc_thread_info, BTFIXUPCALL_NORM);
 	BTFIXUPSET_CALL(free_thread_info, sun4c_free_thread_info, BTFIXUPCALL_NORM);
 
diff -urN -X dontdiff linux-2.5.68-bk5/drivers/sbus/char/openprom.c \
                linux-2.5.68-bk5-sparc/drivers/sbus/char/openprom.c
--- linux-2.5.68-bk5/drivers/sbus/char/openprom.c	2003-03-24 14:01:12.000000000 -0800
+++ linux-2.5.68-bk5-sparc/drivers/sbus/char/openprom.c	2003-04-28 22:42:14.000000000 \
-0700 @@ -40,6 +40,7 @@
 #include <linux/string.h>
 #include <linux/miscdevice.h>
 #include <linux/init.h>
+#include <linux/fs.h>
 #include <asm/oplib.h>
 #include <asm/system.h>
 #include <asm/uaccess.h>
diff -urN -X dontdiff linux-2.5.68-bk5/include/asm-sparc/bitext.h \
                linux-2.5.68-bk5-sparc/include/asm-sparc/bitext.h
--- linux-2.5.68-bk5/include/asm-sparc/bitext.h	1969-12-31 16:00:00.000000000 -0800
+++ linux-2.5.68-bk5-sparc/include/asm-sparc/bitext.h	2003-04-24 12:26:24.000000000 \
-0700 @@ -0,0 +1,24 @@
+/*
+ * bitext.h: Bit string operations on the sparc, specific to architecture.
+ *
+ * Copyright 2002 Pete Zaitcev <zaitcev@yahoo.com>
+ */
+
+#ifndef _SPARC_BITEXT_H
+#define _SPARC_BITEXT_H
+
+#include <linux/smp_lock.h>
+
+struct bit_map {
+	spinlock_t lock;
+	unsigned long *map;
+	int size;
+	int used;
+	int last_off;
+};
+
+extern int bit_map_string_get(struct bit_map *t, int len, int align);
+extern void bit_map_clear(struct bit_map *t, int offset, int len);
+extern void bit_map_init(struct bit_map *t, unsigned long *map, int size);
+
+#endif /* defined(_SPARC_BITEXT_H) */
diff -urN -X dontdiff linux-2.5.68-bk5/include/asm-sparc/iommu.h \
                linux-2.5.68-bk5-sparc/include/asm-sparc/iommu.h
--- linux-2.5.68-bk5/include/asm-sparc/iommu.h	2003-03-24 14:00:03.000000000 -0800
+++ linux-2.5.68-bk5-sparc/include/asm-sparc/iommu.h	2003-04-29 01:17:33.000000000 \
-0700 @@ -6,6 +6,7 @@
 #define _SPARC_IOMMU_H
 
 #include <asm/page.h>
+#include <asm/bitext.h>
 
 /* The iommu handles all virtual to physical address translations
  * that occur between the SBUS and physical memory.  Access by
@@ -100,11 +101,11 @@
 struct iommu_struct {
 	struct iommu_regs *regs;
 	iopte_t *page_table;
-	iopte_t *lowest;     /* to speed up searches... */
-	unsigned long plow;
 	/* For convenience */
 	unsigned long start; /* First managed virtual address */
 	unsigned long end;   /* Last managed virtual address */
+
+	struct bit_map usemap;
 };
 
 extern __inline__ void iommu_invalidate(struct iommu_regs *regs)
@@ -112,9 +113,9 @@
 	regs->tlbflush = 0;
 }
 
-extern __inline__ void iommu_invalidate_page(struct iommu_regs *regs, unsigned long \
page) +extern __inline__ void iommu_invalidate_page(struct iommu_regs *regs, unsigned \
long ba)  {
-	regs->pageflush = (page & PAGE_MASK);
+	regs->pageflush = (ba & PAGE_MASK);
 }
 
 #endif /* !(_SPARC_IOMMU_H) */
diff -urN -X dontdiff linux-2.5.68-bk5/include/asm-sparc/pgtable.h \
                linux-2.5.68-bk5-sparc/include/asm-sparc/pgtable.h
--- linux-2.5.68-bk5/include/asm-sparc/pgtable.h	2003-04-19 21:55:21.000000000 -0700
+++ linux-2.5.68-bk5-sparc/include/asm-sparc/pgtable.h	2003-04-28 22:32:47.000000000 \
-0700 @@ -27,6 +27,7 @@
 #ifndef __ASSEMBLY__
 
 struct vm_area_struct;
+struct page;
 
 extern void load_mmu(void);
 extern unsigned long calc_highpages(void);
@@ -51,15 +52,30 @@
 
 /*
  * mmu_map/unmap are provided by iommu/iounit; Invalid to call on IIep.
+ *
+ * The mmu_map_dma_area establishes two mappings in one go.
+ * These mappings point to pages normally mapped at 'va' (linear address).
+ * First mapping is for CPU visible address at 'a', uncached.
+ * This is an alias, but it works because it is an uncached mapping.
+ * Second mapping is for device visible address, or "bus" address.
+ * The bus address is returned at '*pba'.
+ *
+ * These functions seem distinct, but are hard to split. On sun4c,
+ * at least for now, 'a' is equal to bus address, and retured in *pba.
+ * On sun4m, page attributes depend on the CPU type, so we have to
+ * know if we are mapping RAM or I/O, so it has to be an additional argument
+ * to a separate mapping function for CPU visible mappings.
  */
-BTFIXUPDEF_CALL(void,  mmu_map_dma_area, unsigned long va, __u32 addr, int len)
-BTFIXUPDEF_CALL(unsigned long /*phys*/, mmu_translate_dvma, unsigned long busa)
+BTFIXUPDEF_CALL(int,  mmu_map_dma_area, dma_addr_t *, unsigned long, unsigned long, \
int len) +BTFIXUPDEF_CALL(struct page *, mmu_translate_dvma, unsigned long busa)
 BTFIXUPDEF_CALL(void,  mmu_unmap_dma_area, unsigned long busa, int len)
 
-#define mmu_map_dma_area(va, ba,len) BTFIXUP_CALL(mmu_map_dma_area)(va,ba,len)
+#define mmu_map_dma_area(pba,va,a,len) BTFIXUP_CALL(mmu_map_dma_area)(pba,va,a,len)
 #define mmu_unmap_dma_area(ba,len) BTFIXUP_CALL(mmu_unmap_dma_area)(ba,len)
 #define mmu_translate_dvma(ba)     BTFIXUP_CALL(mmu_translate_dvma)(ba)
 
+/*
+ */
 BTFIXUPDEF_SIMM13(pmd_shift)
 BTFIXUPDEF_SETHI(pmd_size)
 BTFIXUPDEF_SETHI(pmd_mask)
@@ -377,6 +393,12 @@
 
 #define update_mmu_cache(vma,addr,pte) BTFIXUP_CALL(update_mmu_cache)(vma,addr,pte)
 
+BTFIXUPDEF_CALL(void, sparc_mapiorange, unsigned int, unsigned long,
+    unsigned long, unsigned int)
+BTFIXUPDEF_CALL(void, sparc_unmapiorange, unsigned long, unsigned int)
+#define sparc_mapiorange(bus,pa,va,len) \
BTFIXUP_CALL(sparc_mapiorange)(bus,pa,va,len) +#define sparc_unmapiorange(va,len) \
BTFIXUP_CALL(sparc_unmapiorange)(va,len) +
 extern int invalid_segment;
 
 /* Encode and de-code a swap entry */
diff -urN -X dontdiff linux-2.5.68-bk5/include/asm-sparc/sbus.h \
                linux-2.5.68-bk5-sparc/include/asm-sparc/sbus.h
--- linux-2.5.68-bk5/include/asm-sparc/sbus.h	2003-03-24 14:00:08.000000000 -0800
+++ linux-2.5.68-bk5-sparc/include/asm-sparc/sbus.h	2003-04-29 01:40:33.000000000 \
-0700 @@ -10,7 +10,7 @@
 #include <linux/ioport.h>
 
 #include <asm/oplib.h>
-#include <asm/iommu.h>
+/* #include <asm/iommu.h> */ /* Unused since we use opaque iommu (|io-unit) */
 #include <asm/scatterlist.h>
 
 /* We scan which devices are on the SBus using the PROM node device
-
To unsubscribe from this list: send the line "unsubscribe sparclinux" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic