Linux 3.17.2

sparc64: Implement __get_user_pages_fast().
[ Upstream commit 06090e8ed8 ] It is not sufficient to only implement get_user_pages_fast(), you must also implement the atomic version __get_user_pages_fast() otherwise you end up using the weak symbol fallback implementation which simply returns zero. This is dangerous, because it causes the futex code to loop forever if transparent hugepages are supported (see get_futex_key()). Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2014-10-30 09:43:25 -07:00 · 2014-10-30 09:43:19 -07:00 · 2014-10-30 09:43:18 -07:00 · 2014-10-30 09:43:18 -07:00 · 2014-10-30 09:43:18 -07:00 · 2014-10-30 09:43:18 -07:00
232 changed files with 2810 additions and 1554 deletions
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@ -3522,6 +3522,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 					READ_DISC_INFO command);
 				e = NO_READ_CAPACITY_16 (don't use
 					READ_CAPACITY_16 command);
+				f = NO_REPORT_OPCODES (don't use report opcodes
+					command, uas only);
 				h = CAPACITY_HEURISTICS (decrease the
 					reported device capacity by one
 					sector if the number is odd);
@ -3541,6 +3543,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 					bogus residue values);
 				s = SINGLE_LUN (the device has only one
 					Logical Unit);
+				t = NO_ATA_1X (don't allow ATA(12) and ATA(16)
+					commands, uas only);
 				u = IGNORE_UAS (don't bind to the uas driver);
 				w = NO_WP_DETECT (don't test whether the
 					medium is write-protected).
--- a/Documentation/lzo.txt
+++ b/Documentation/lzo.txt
@ -0,0 +1,164 @@
+
+LZO stream format as understood by Linux's LZO decompressor
+===========================================================
+
+Introduction
+
+  This is not a specification. No specification seems to be publicly available
+  for the LZO stream format. This document describes what input format the LZO
+  decompressor as implemented in the Linux kernel understands. The file subject
+  of this analysis is lib/lzo/lzo1x_decompress_safe.c. No analysis was made on
+  the compressor nor on any other implementations though it seems likely that
+  the format matches the standard one. The purpose of this document is to
+  better understand what the code does in order to propose more efficient fixes
+  for future bug reports.
+
+Description
+
+  The stream is composed of a series of instructions, operands, and data. The
+  instructions consist in a few bits representing an opcode, and bits forming
+  the operands for the instruction, whose size and position depend on the
+  opcode and on the number of literals copied by previous instruction. The
+  operands are used to indicate :
+
+    - a distance when copying data from the dictionary (past output buffer)
+    - a length (number of bytes to copy from dictionary)
+    - the number of literals to copy, which is retained in variable "state"
+      as a piece of information for next instructions.
+
+  Optionally depending on the opcode and operands, extra data may follow. These
+  extra data can be a complement for the operand (eg: a length or a distance
+  encoded on larger values), or a literal to be copied to the output buffer.
+
+  The first byte of the block follows a different encoding from other bytes, it
+  seems to be optimized for literal use only, since there is no dictionary yet
+  prior to that byte.
+
+  Lengths are always encoded on a variable size starting with a small number
+  of bits in the operand. If the number of bits isn't enough to represent the
+  length, up to 255 may be added in increments by consuming more bytes with a
+  rate of at most 255 per extra byte (thus the compression ratio cannot exceed
+  around 255:1). The variable length encoding using #bits is always the same :
+
+       length = byte & ((1 << #bits) - 1)
+       if (!length) {
+               length = ((1 << #bits) - 1)
+               length += 255*(number of zero bytes)
+               length += first-non-zero-byte
+       }
+       length += constant (generally 2 or 3)
+
+  For references to the dictionary, distances are relative to the output
+  pointer. Distances are encoded using very few bits belonging to certain
+  ranges, resulting in multiple copy instructions using different encodings.
+  Certain encodings involve one extra byte, others involve two extra bytes
+  forming a little-endian 16-bit quantity (marked LE16 below).
+
+  After any instruction except the large literal copy, 0, 1, 2 or 3 literals
+  are copied before starting the next instruction. The number of literals that
+  were copied may change the meaning and behaviour of the next instruction. In
+  practice, only one instruction needs to know whether 0, less than 4, or more
+  literals were copied. This is the information stored in the <state> variable
+  in this implementation. This number of immediate literals to be copied is
+  generally encoded in the last two bits of the instruction but may also be
+  taken from the last two bits of an extra operand (eg: distance).
+
+  End of stream is declared when a block copy of distance 0 is seen. Only one
+  instruction may encode this distance (0001HLLL), it takes one LE16 operand
+  for the distance, thus requiring 3 bytes.
+
+  IMPORTANT NOTE : in the code some length checks are missing because certain
+  instructions are called under the assumption that a certain number of bytes
+  follow because it has already been garanteed before parsing the instructions.
+  They just have to "refill" this credit if they consume extra bytes. This is
+  an implementation design choice independant on the algorithm or encoding.
+
+Byte sequences
+
+  First byte encoding :
+
+      0..17   : follow regular instruction encoding, see below. It is worth
+                noting that codes 16 and 17 will represent a block copy from
+                the dictionary which is empty, and that they will always be
+                invalid at this place.
+
+      18..21  : copy 0..3 literals
+                state = (byte - 17) = 0..3  [ copy <state> literals ]
+                skip byte
+
+      22..255 : copy literal string
+                length = (byte - 17) = 4..238
+                state = 4 [ don't copy extra literals ]
+                skip byte
+
+  Instruction encoding :
+
+      0 0 0 0 X X X X  (0..15)
+        Depends on the number of literals copied by the last instruction.
+        If last instruction did not copy any literal (state == 0), this
+        encoding will be a copy of 4 or more literal, and must be interpreted
+        like this :
+
+           0 0 0 0 L L L L  (0..15)  : copy long literal string
+           length = 3 + (L ?: 15 + (zero_bytes * 255) + non_zero_byte)
+           state = 4  (no extra literals are copied)
+
+        If last instruction used to copy between 1 to 3 literals (encoded in
+        the instruction's opcode or distance), the instruction is a copy of a
+        2-byte block from the dictionary within a 1kB distance. It is worth
+        noting that this instruction provides little savings since it uses 2
+        bytes to encode a copy of 2 other bytes but it encodes the number of
+        following literals for free. It must be interpreted like this :
+
+           0 0 0 0 D D S S  (0..15)  : copy 2 bytes from <= 1kB distance
+           length = 2
+           state = S (copy S literals after this block)
+         Always followed by exactly one byte : H H H H H H H H
+           distance = (H << 2) + D + 1
+
+        If last instruction used to copy 4 or more literals (as detected by
+        state == 4), the instruction becomes a copy of a 3-byte block from the
+        dictionary from a 2..3kB distance, and must be interpreted like this :
+
+           0 0 0 0 D D S S  (0..15)  : copy 3 bytes from 2..3 kB distance
+           length = 3
+           state = S (copy S literals after this block)
+         Always followed by exactly one byte : H H H H H H H H
+           distance = (H << 2) + D + 2049
+
+      0 0 0 1 H L L L  (16..31)
+           Copy of a block within 16..48kB distance (preferably less than 10B)
+           length = 2 + (L ?: 7 + (zero_bytes * 255) + non_zero_byte)
+        Always followed by exactly one LE16 :  D D D D D D D D : D D D D D D S S
+           distance = 16384 + (H << 14) + D
+           state = S (copy S literals after this block)
+           End of stream is reached if distance == 16384
+
+      0 0 1 L L L L L  (32..63)
+           Copy of small block within 16kB distance (preferably less than 34B)
+           length = 2 + (L ?: 31 + (zero_bytes * 255) + non_zero_byte)
+        Always followed by exactly one LE16 :  D D D D D D D D : D D D D D D S S
+           distance = D + 1
+           state = S (copy S literals after this block)
+
+      0 1 L D D D S S  (64..127)
+           Copy 3-4 bytes from block within 2kB distance
+           state = S (copy S literals after this block)
+           length = 3 + L
+         Always followed by exactly one byte : H H H H H H H H
+           distance = (H << 3) + D + 1
+
+      1 L L D D D S S  (128..255)
+           Copy 5-8 bytes from block within 2kB distance
+           state = S (copy S literals after this block)
+           length = 5 + L
+         Always followed by exactly one byte : H H H H H H H H
+           distance = (H << 3) + D + 1
+
+Authors
+
+  This document was written by Willy Tarreau <w@1wt.eu> on 2014/07/19 during an
+  analysis of the decompression code available in Linux 3.16-rc5. The code is
+  tricky, it is possible that this document contains mistakes or that a few
+  corner cases were overlooked. In any case, please report any doubt, fix, or
+  proposed updates to the author(s) so that the document can be updated.
--- a/Documentation/virtual/kvm/mmu.txt
+++ b/Documentation/virtual/kvm/mmu.txt
@ -425,6 +425,20 @@ fault through the slow path.
 Since only 19 bits are used to store generation-number on mmio spte, all
 pages are zapped when there is an overflow.

+Unfortunately, a single memory access might access kvm_memslots(kvm) multiple
+times, the last one happening when the generation number is retrieved and
+stored into the MMIO spte.  Thus, the MMIO spte might be created based on
+out-of-date information, but with an up-to-date generation number.
+
+To avoid this, the generation number is incremented again after synchronize_srcu
+returns; thus, the low bit of kvm_memslots(kvm)->generation is only 1 during a
+memslot update, while some SRCU readers might be using the old copy.  We do not
+want to use an MMIO sptes created with an odd generation number, and we can do
+this without losing a bit in the MMIO spte.  The low bit of the generation
+is not stored in MMIO spte, and presumed zero when it is extracted out of the
+spte.  If KVM is unlucky and creates an MMIO spte while the low bit is 1,
+the next access to the spte will always be a cache miss.
+

 Further reading
 ===============
--- a/2
+++ b/2
@ -1,6 +1,6 @@
 VERSION = 3
 PATCHLEVEL = 17
-SUBLEVEL = 0
+SUBLEVEL = 2
 EXTRAVERSION =
 NAME = Shuffling Zombie Juror

--- a/arch/alpha/include/asm/Kbuild
+++ b/arch/alpha/include/asm/Kbuild
@ -4,6 +4,7 @@ generic-y += clkdev.h
 generic-y += cputime.h
 generic-y += exec.h
 generic-y += hash.h
+generic-y += irq_work.h
 generic-y += mcs_spinlock.h
 generic-y += preempt.h
 generic-y += scatterlist.h
--- a/arch/arc/include/asm/Kbuild
+++ b/arch/arc/include/asm/Kbuild
@ -18,6 +18,7 @@ generic-y += ioctl.h
 generic-y += ioctls.h
 generic-y += ipcbuf.h
 generic-y += irq_regs.h
+generic-y += irq_work.h
 generic-y += kmap_types.h
 generic-y += kvm_para.h
 generic-y += local.h
--- a/arch/arm/boot/dts/Makefile
+++ b/arch/arm/boot/dts/Makefile
@ -144,8 +144,8 @@ dtb-$(CONFIG_MACH_KIRKWOOD) += kirkwood-b3.dtb \
 	kirkwood-openrd-client.dtb \
 	kirkwood-openrd-ultimate.dtb \
 	kirkwood-rd88f6192.dtb \
-	kirkwood-rd88f6281-a0.dtb \
-	kirkwood-rd88f6281-a1.dtb \
+	kirkwood-rd88f6281-z0.dtb \
+	kirkwood-rd88f6281-a.dtb \
 	kirkwood-rs212.dtb \
 	kirkwood-rs409.dtb \
 	kirkwood-rs411.dtb \
--- a/arch/arm/boot/dts/armada-370-netgear-rn102.dts
+++ b/arch/arm/boot/dts/armada-370-netgear-rn102.dts
@ -143,6 +143,10 @@
 				marvell,nand-enable-arbiter;
 				nand-on-flash-bbt;

+				/* Use Hardware BCH ECC */
+				nand-ecc-strength = <4>;
+				nand-ecc-step-size = <512>;
+
 				partition@0 {
 					label = "u-boot";
 					reg = <0x0000000 0x180000>;  /* 1.5MB */
--- a/arch/arm/boot/dts/armada-370-netgear-rn104.dts
+++ b/arch/arm/boot/dts/armada-370-netgear-rn104.dts
@ -145,6 +145,10 @@
 				marvell,nand-enable-arbiter;
 				nand-on-flash-bbt;

+				/* Use Hardware BCH ECC */
+				nand-ecc-strength = <4>;
+				nand-ecc-step-size = <512>;
+
 				partition@0 {
 					label = "u-boot";
 					reg = <0x0000000 0x180000>;  /* 1.5MB */
--- a/arch/arm/boot/dts/armada-xp-netgear-rn2120.dts
+++ b/arch/arm/boot/dts/armada-xp-netgear-rn2120.dts
@ -223,6 +223,10 @@
 				marvell,nand-enable-arbiter;
 				nand-on-flash-bbt;

+				/* Use Hardware BCH ECC */
+				nand-ecc-strength = <4>;
+				nand-ecc-step-size = <512>;
+
 				partition@0 {
 					label = "u-boot";
 					reg = <0x0000000 0x180000>;  /* 1.5MB */
--- a/arch/arm/boot/dts/at91sam9263.dtsi
+++ b/arch/arm/boot/dts/at91sam9263.dtsi
@ -834,6 +834,7 @@
 				compatible = "atmel,hsmci";
 				reg = <0xfff80000 0x600>;
 				interrupts = <10 IRQ_TYPE_LEVEL_HIGH 0>;
+				pinctrl-names = "default";
 				#address-cells = <1>;
 				#size-cells = <0>;
 				clocks = <&mci0_clk>;
@ -845,6 +846,7 @@
 				compatible = "atmel,hsmci";
 				reg = <0xfff84000 0x600>;
 				interrupts = <11 IRQ_TYPE_LEVEL_HIGH 0>;
+				pinctrl-names = "default";
 				#address-cells = <1>;
 				#size-cells = <0>;
 				clocks = <&mci1_clk>;
--- a/arch/arm/boot/dts/imx28-evk.dts
+++ b/arch/arm/boot/dts/imx28-evk.dts
@ -193,7 +193,6 @@
 			i2c0: i2c@80058000 {
 				pinctrl-names = "default";
 				pinctrl-0 = <&i2c0_pins_a>;
-				clock-frequency = <400000>;
 				status = "okay";

 				sgtl5000: codec@0a {
--- a/arch/arm/boot/dts/kirkwood-mv88f6281gtw-ge.dts
+++ b/arch/arm/boot/dts/kirkwood-mv88f6281gtw-ge.dts
@ -123,11 +123,11 @@

 	dsa@0 {
 		compatible = "marvell,dsa";
-		#address-cells = <2>;
+		#address-cells = <1>;
 		#size-cells = <0>;

-		dsa,ethernet = <&eth0>;
-		dsa,mii-bus = <&ethphy0>;
+		dsa,ethernet = <&eth0port>;
+		dsa,mii-bus = <&mdio>;

 		switch@0 {
 			#address-cells = <1>;
@ -169,17 +169,13 @@

 &mdio {
 	status = "okay";
-
-	ethphy0: ethernet-phy@ff {
-		reg = <0xff>; 	/* No phy attached */
-		speed = <1000>;
-		duplex = <1>;
-	};
 };

 &eth0 {
 	status = "okay";
+
 	ethernet0-port@0 {
-		phy-handle = <&ethphy0>;
+		speed = <1000>;
+		duplex = <1>;
 	};
 };
--- a/arch/arm/boot/dts/kirkwood-rd88f6281-a.dts
+++ b/arch/arm/boot/dts/kirkwood-rd88f6281-a.dts
@ -0,0 +1,43 @@
+/*
+ * Marvell RD88F6181 A Board descrition
+ *
+ * Andrew Lunn <andrew@lunn.ch>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ *
+ * This file contains the definitions for the board with the A0 or
+ * higher stepping of the SoC. The ethernet switch does not have a
+ * "wan" port.
+ */
+
+/dts-v1/;
+#include "kirkwood-rd88f6281.dtsi"
+
+/ {
+	model = "Marvell RD88f6281 Reference design, with A0 or higher SoC";
+	compatible = "marvell,rd88f6281-a", "marvell,rd88f6281","marvell,kirkwood-88f6281", "marvell,kirkwood";
+
+	dsa@0 {
+		switch@0 {
+			reg = <10 0>;	 /* MDIO address 10, switch 0 in tree */
+		};
+	};
+};
+
+&mdio {
+	status = "okay";
+
+	ethphy1: ethernet-phy@11 {
+		 reg = <11>;
+	};
+};
+
+&eth1 {
+	status = "okay";
+
+	ethernet1-port@0 {
+		 phy-handle = <&ethphy1>;
+	};
+};
--- a/arch/arm/boot/dts/kirkwood-rd88f6281-a0.dts
+++ b/arch/arm/boot/dts/kirkwood-rd88f6281-a0.dts
@ -1,26 +0,0 @@
-/*
- * Marvell RD88F6181 A0 Board descrition
- *
- * Andrew Lunn <andrew@lunn.ch>
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2.  This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
- *
- * This file contains the definitions for the board with the A0 variant of
- * the SoC. The ethernet switch does not have a "wan" port.
- */
-
-/dts-v1/;
-#include "kirkwood-rd88f6281.dtsi"
-
-/ {
-	model = "Marvell RD88f6281 Reference design, with A0 SoC";
-	compatible = "marvell,rd88f6281-a0", "marvell,rd88f6281","marvell,kirkwood-88f6281", "marvell,kirkwood";
-
-	dsa@0 {
-		switch@0 {
-			reg = <10 0>;    /* MDIO address 10, switch 0 in tree */
-		};
-	};
-};
--- a/arch/arm/boot/dts/kirkwood-rd88f6281-z0.dts
+++ b/arch/arm/boot/dts/kirkwood-rd88f6281-z0.dts
@ -1,5 +1,5 @@
 /*
- * Marvell RD88F6181 A1 Board descrition
+ * Marvell RD88F6181 Z0 stepping descrition
 *
 * Andrew Lunn <andrew@lunn.ch>
 *
@ -7,17 +7,17 @@
 * License version 2.  This program is licensed "as is" without any
 * warranty of any kind, whether express or implied.
 *
- * This file contains the definitions for the board with the A1 variant of
- * the SoC. The ethernet switch has a "wan" port.
- */
+ * This file contains the definitions for the board using the Z0
+ * stepping of the SoC. The ethernet switch has a "wan" port.
+*/

 /dts-v1/;

 #include "kirkwood-rd88f6281.dtsi"

 / {
-	model = "Marvell RD88f6281 Reference design, with A1 SoC";
-	compatible = "marvell,rd88f6281-a1", "marvell,rd88f6281","marvell,kirkwood-88f6281", "marvell,kirkwood";
+	model = "Marvell RD88f6281 Reference design, with Z0 SoC";
+	compatible = "marvell,rd88f6281-z0", "marvell,rd88f6281","marvell,kirkwood-88f6281", "marvell,kirkwood";

 	dsa@0 {
 		switch@0 {
@ -28,4 +28,8 @@
 			};
 		};
 	};
-};
+};
+
+&eth1 {
+      status = "disabled";
+};
--- a/arch/arm/boot/dts/kirkwood-rd88f6281.dtsi
+++ b/arch/arm/boot/dts/kirkwood-rd88f6281.dtsi
@ -37,7 +37,6 @@

 	ocp@f1000000 {
 		pinctrl: pin-controller@10000 {
-			pinctrl-0 = <&pmx_sdio_cd>;
 			pinctrl-names = "default";

 			pmx_sdio_cd: pmx-sdio-cd {
@ -69,8 +68,8 @@
 		#address-cells = <2>;
 		#size-cells = <0>;

-		dsa,ethernet = <&eth0>;
-		dsa,mii-bus = <&ethphy1>;
+		dsa,ethernet = <&eth0port>;
+		dsa,mii-bus = <&mdio>;

 		switch@0 {
 			#address-cells = <1>;
@ -119,35 +118,19 @@
 	};

 	partition@300000 {
-		label = "data";
+		label = "rootfs";
 		reg = <0x0300000 0x500000>;
 	};
 };

 &mdio {
 	status = "okay";
-
-	ethphy0: ethernet-phy@0 {
-		reg = <0>;
-	};
-
-	ethphy1: ethernet-phy@ff {
-		reg = <0xff>; /* No PHY attached */
-		speed = <1000>;
-		duple = <1>;
-	};
 };

 &eth0 {
 	status = "okay";
 	ethernet0-port@0 {
-		phy-handle = <&ethphy0>;
-	};
-};
-
-&eth1 {
-	status = "okay";
-	ethernet1-port@0 {
-		phy-handle = <&ethphy1>;
+		speed = <1000>;
+		duplex = <1>;
 	};
 };
--- a/arch/arm/boot/dts/kirkwood.dtsi
+++ b/arch/arm/boot/dts/kirkwood.dtsi
@ -309,7 +309,7 @@
 			marvell,tx-checksum-limit = <1600>;
 			status = "disabled";

-			ethernet0-port@0 {
+			eth0port: ethernet0-port@0 {
 				compatible = "marvell,kirkwood-eth-port";
 				reg = <0>;
 				interrupts = <11>;
@ -342,7 +342,7 @@
 			pinctrl-names = "default";
 			status = "disabled";

-			ethernet1-port@0 {
+			eth1port: ethernet1-port@0 {
 				compatible = "marvell,kirkwood-eth-port";
 				reg = <0>;
 				interrupts = <15>;
--- a/arch/arm/boot/dts/sama5d3_can.dtsi
+++ b/arch/arm/boot/dts/sama5d3_can.dtsi
@ -40,7 +40,7 @@
 						atmel,clk-output-range = <0 66000000>;
 					};

-					can1_clk: can0_clk {
+					can1_clk: can1_clk {
 						#clock-cells = <0>;
 						reg = <41>;
 						atmel,clk-output-range = <0 66000000>;
--- a/arch/arm/include/asm/irq_work.h
+++ b/arch/arm/include/asm/irq_work.h
@ -0,0 +1,11 @@
+#ifndef __ASM_ARM_IRQ_WORK_H
+#define __ASM_ARM_IRQ_WORK_H
+
+#include <asm/smp_plat.h>
+
+static inline bool arch_irq_work_has_interrupt(void)
+{
+	return is_smp();
+}
+
+#endif /* _ASM_ARM_IRQ_WORK_H */
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@ -503,7 +503,7 @@ void arch_send_call_function_single_ipi(int cpu)
 #ifdef CONFIG_IRQ_WORK
 void arch_irq_work_raise(void)
 {
-	if (is_smp())
+	if (arch_irq_work_has_interrupt())
 		smp_cross_call(cpumask_of(smp_processor_id()), IPI_IRQ_WORK);
 }
 #endif
--- a/arch/arm/mach-at91/clock.c
+++ b/arch/arm/mach-at91/clock.c
@ -962,6 +962,7 @@ static int __init at91_clock_reset(void)
 	}

 	at91_pmc_write(AT91_PMC_SCDR, scdr);
+	at91_pmc_write(AT91_PMC_PCDR, pcdr);
 	if (cpu_is_sama5d3())
 		at91_pmc_write(AT91_PMC_PCDR1, pcdr1);

--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@ -9,8 +9,8 @@ generic-y += current.h
 generic-y += delay.h
 generic-y += div64.h
 generic-y += dma.h
-generic-y += emergency-restart.h
 generic-y += early_ioremap.h
+generic-y += emergency-restart.h
 generic-y += errno.h
 generic-y += ftrace.h
 generic-y += hash.h
--- a/arch/arm64/include/asm/compat.h
+++ b/arch/arm64/include/asm/compat.h
@ -37,8 +37,8 @@ typedef s32		compat_ssize_t;
 typedef s32		compat_time_t;
 typedef s32		compat_clock_t;
 typedef s32		compat_pid_t;
-typedef u32		__compat_uid_t;
-typedef u32		__compat_gid_t;
+typedef u16		__compat_uid_t;
+typedef u16		__compat_gid_t;
 typedef u16		__compat_uid16_t;
 typedef u16		__compat_gid16_t;
 typedef u32		__compat_uid32_t;
--- a/arch/arm64/include/asm/irq_work.h
+++ b/arch/arm64/include/asm/irq_work.h
@ -0,0 +1,22 @@
+#ifndef __ASM_IRQ_WORK_H
+#define __ASM_IRQ_WORK_H
+
+#ifdef CONFIG_SMP
+
+#include <asm/smp.h>
+
+static inline bool arch_irq_work_has_interrupt(void)
+{
+	return !!__smp_cross_call;
+}
+
+#else
+
+static inline bool arch_irq_work_has_interrupt(void)
+{
+	return false;
+}
+
+#endif
+
+#endif /* __ASM_IRQ_WORK_H */
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@ -48,6 +48,8 @@ extern void smp_init_cpus(void);
 */
 extern void set_smp_cross_call(void (*)(const struct cpumask *, unsigned int));

+extern void (*__smp_cross_call)(const struct cpumask *, unsigned int);
+
 /*
 * Called from the secondary holding pen, this is the secondary CPU entry point.
 */
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@ -324,7 +324,6 @@ el1_dbg:
 	mrs	x0, far_el1
 	mov	x2, sp				// struct pt_regs
 	bl	do_debug_exception
-	enable_dbg
 	kernel_exit 1
 el1_inv:
 	// TODO: add support for undefined instructions in kernel mode
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@ -470,7 +470,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 	}
 }

-static void (*__smp_cross_call)(const struct cpumask *, unsigned int);
+void (*__smp_cross_call)(const struct cpumask *, unsigned int);

 void __init set_smp_cross_call(void (*fn)(const struct cpumask *, unsigned int))
 {
--- a/arch/avr32/include/asm/Kbuild
+++ b/arch/avr32/include/asm/Kbuild
@ -9,6 +9,7 @@ generic-y += exec.h
 generic-y += futex.h
 generic-y += hash.h
 generic-y += irq_regs.h
+generic-y += irq_work.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
--- a/arch/blackfin/include/asm/Kbuild
+++ b/arch/blackfin/include/asm/Kbuild
@ -15,6 +15,7 @@ generic-y += hw_irq.h
 generic-y += ioctl.h
 generic-y += ipcbuf.h
 generic-y += irq_regs.h
+generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
 generic-y += kvm_para.h
--- a/arch/c6x/include/asm/Kbuild
+++ b/arch/c6x/include/asm/Kbuild
@ -22,6 +22,7 @@ generic-y += ioctl.h
 generic-y += ioctls.h
 generic-y += ipcbuf.h
 generic-y += irq_regs.h
+generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
 generic-y += local.h
--- a/arch/cris/include/asm/Kbuild
+++ b/arch/cris/include/asm/Kbuild
@ -8,6 +8,7 @@ generic-y += clkdev.h
 generic-y += cputime.h
 generic-y += exec.h
 generic-y += hash.h
+generic-y += irq_work.h
 generic-y += kvm_para.h
 generic-y += linkage.h
 generic-y += mcs_spinlock.h
--- a/arch/frv/include/asm/Kbuild
+++ b/arch/frv/include/asm/Kbuild
@ -3,6 +3,7 @@ generic-y += clkdev.h
 generic-y += cputime.h
 generic-y += exec.h
 generic-y += hash.h
+generic-y += irq_work.h
 generic-y += mcs_spinlock.h
 generic-y += preempt.h
 generic-y += scatterlist.h
--- a/arch/hexagon/include/asm/Kbuild
+++ b/arch/hexagon/include/asm/Kbuild
@ -23,6 +23,7 @@ generic-y += ioctls.h
 generic-y += iomap.h
 generic-y += ipcbuf.h
 generic-y += irq_regs.h
+generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
 generic-y += local.h
--- a/arch/ia64/include/asm/Kbuild
+++ b/arch/ia64/include/asm/Kbuild
@ -2,6 +2,7 @@
 generic-y += clkdev.h
 generic-y += exec.h
 generic-y += hash.h
+generic-y += irq_work.h
 generic-y += kvm_para.h
 generic-y += mcs_spinlock.h
 generic-y += preempt.h
--- a/arch/m32r/include/asm/Kbuild
+++ b/arch/m32r/include/asm/Kbuild
@ -3,6 +3,7 @@ generic-y += clkdev.h
 generic-y += cputime.h
 generic-y += exec.h
 generic-y += hash.h
+generic-y += irq_work.h
 generic-y += mcs_spinlock.h
 generic-y += module.h
 generic-y += preempt.h
--- a/arch/m68k/include/asm/Kbuild
+++ b/arch/m68k/include/asm/Kbuild
@ -11,6 +11,7 @@ generic-y += hw_irq.h
 generic-y += ioctl.h
 generic-y += ipcbuf.h
 generic-y += irq_regs.h
+generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
 generic-y += kvm_para.h
--- a/arch/m68k/mm/hwtest.c
+++ b/arch/m68k/mm/hwtest.c
@ -28,9 +28,11 @@
 int hwreg_present( volatile void *regp )
 {
    int	ret = 0;
+    unsigned long flags;
    long	save_sp, save_vbr;
    long	tmp_vectors[3];

+    local_irq_save(flags);
    __asm__ __volatile__
 	(	"movec	%/vbr,%2\n\t"
 		"movel	#Lberr1,%4@(8)\n\t"
@ -46,6 +48,7 @@ int hwreg_present( volatile void *regp )
 		: "=&d" (ret), "=&r" (save_sp), "=&r" (save_vbr)
 		: "a" (regp), "a" (tmp_vectors)
                );
+    local_irq_restore(flags);

    return( ret );
 }
@ -58,9 +61,11 @@ EXPORT_SYMBOL(hwreg_present);
 int hwreg_write( volatile void *regp, unsigned short val )
 {
 	int		ret;
+	unsigned long flags;
 	long	save_sp, save_vbr;
 	long	tmp_vectors[3];

+	local_irq_save(flags);
 	__asm__ __volatile__
 	(	"movec	%/vbr,%2\n\t"
 		"movel	#Lberr2,%4@(8)\n\t"
@ -78,6 +83,7 @@ int hwreg_write( volatile void *regp, unsigned short val )
 		: "=&d" (ret), "=&r" (save_sp), "=&r" (save_vbr)
 		: "a" (regp), "a" (tmp_vectors), "g" (val)
 	);
+	local_irq_restore(flags);

 	return( ret );
 }
--- a/arch/metag/include/asm/Kbuild
+++ b/arch/metag/include/asm/Kbuild
@ -19,6 +19,7 @@ generic-y += ioctl.h
 generic-y += ioctls.h
 generic-y += ipcbuf.h
 generic-y += irq_regs.h
+generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
 generic-y += kvm_para.h
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@ -5,6 +5,7 @@ generic-y += cputime.h
 generic-y += device.h
 generic-y += exec.h
 generic-y += hash.h
+generic-y += irq_work.h
 generic-y += mcs_spinlock.h
 generic-y += preempt.h
 generic-y += scatterlist.h
--- a/arch/mips/include/asm/Kbuild
+++ b/arch/mips/include/asm/Kbuild
@ -3,6 +3,7 @@ generic-y += cputime.h
 generic-y += current.h
 generic-y += emergency-restart.h
 generic-y += hash.h
+generic-y += irq_work.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mutex.h
--- a/arch/mn10300/include/asm/Kbuild
+++ b/arch/mn10300/include/asm/Kbuild
@ -4,6 +4,7 @@ generic-y += clkdev.h
 generic-y += cputime.h
 generic-y += exec.h
 generic-y += hash.h
+generic-y += irq_work.h
 generic-y += mcs_spinlock.h
 generic-y += preempt.h
 generic-y += scatterlist.h
--- a/arch/openrisc/include/asm/Kbuild
+++ b/arch/openrisc/include/asm/Kbuild
@ -31,6 +31,7 @@ generic-y += ioctl.h
 generic-y += ioctls.h
 generic-y += ipcbuf.h
 generic-y += irq_regs.h
+generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
 generic-y += kvm_para.h
--- a/arch/parisc/include/asm/Kbuild
+++ b/arch/parisc/include/asm/Kbuild
@ -10,6 +10,7 @@ generic-y += exec.h
 generic-y += hash.h
 generic-y += hw_irq.h
 generic-y += irq_regs.h
+generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kvm_para.h
 generic-y += local.h
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@ -1,6 +1,7 @@

 generic-y += clkdev.h
 generic-y += hash.h
+generic-y += irq_work.h
 generic-y += mcs_spinlock.h
 generic-y += preempt.h
 generic-y += rwsem.h
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@ -584,6 +584,8 @@ static void *__eeh_pe_state_clear(void *data, void *flag)
 {
 	struct eeh_pe *pe = (struct eeh_pe *)data;
 	int state = *((int *)flag);
+	struct eeh_dev *edev, *tmp;
+	struct pci_dev *pdev;

 	/* Keep the state of permanently removed PE intact */
 	if ((pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) &&
@ -592,9 +594,22 @@ static void *__eeh_pe_state_clear(void *data, void *flag)

 	pe->state &= ~state;

-	/* Clear check count since last isolation */
-	if (state & EEH_PE_ISOLATED)
-		pe->check_count = 0;
+	/*
+	 * Special treatment on clearing isolated state. Clear
+	 * check count since last isolation and put all affected
+	 * devices to normal state.
+	 */
+	if (!(state & EEH_PE_ISOLATED))
+		return NULL;
+
+	pe->check_count = 0;
+	eeh_pe_for_each_dev(pe, edev, tmp) {
+		pdev = eeh_dev_to_pci_dev(edev);
+		if (!pdev)
+			continue;
+
+		pdev->error_state = pci_channel_io_normal;
+	}

 	return NULL;
 }
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@ -379,8 +379,11 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 		/*
 		 * numa_node_id() works after this.
 		 */
-		set_cpu_numa_node(cpu, numa_cpu_lookup_table[cpu]);
-		set_cpu_numa_mem(cpu, local_memory_node(numa_cpu_lookup_table[cpu]));
+		if (cpu_present(cpu)) {
+			set_cpu_numa_node(cpu, numa_cpu_lookup_table[cpu]);
+			set_cpu_numa_mem(cpu,
+				local_memory_node(numa_cpu_lookup_table[cpu]));
+		}
 	}

 	cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid));
@ -728,6 +731,9 @@ void start_secondary(void *unused)
 	}
 	traverse_core_siblings(cpu, true);

+	set_numa_node(numa_cpu_lookup_table[cpu]);
+	set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu]));
+
 	smp_wmb();
 	notify_cpu_starting(cpu);
 	set_cpu_online(cpu, true);
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@ -1127,9 +1127,8 @@ void __init do_init_bootmem(void)
 	 * even before we online them, so that we can use cpu_to_{node,mem}
 	 * early in boot, cf. smp_prepare_cpus().
 	 */
-	for_each_possible_cpu(cpu) {
-		cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE,
-				  (void *)(unsigned long)cpu);
+	for_each_present_cpu(cpu) {
+		numa_setup_cpu((unsigned long)cpu);
 	}
 }

--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@ -329,16 +329,16 @@ struct direct_window {

 /* Dynamic DMA Window support */
 struct ddw_query_response {
-	__be32 windows_available;
-	__be32 largest_available_block;
-	__be32 page_size;
-	__be32 migration_capable;
+	u32 windows_available;
+	u32 largest_available_block;
+	u32 page_size;
+	u32 migration_capable;
 };

 struct ddw_create_response {
-	__be32 liobn;
-	__be32 addr_hi;
-	__be32 addr_lo;
+	u32 liobn;
+	u32 addr_hi;
+	u32 addr_lo;
 };

 static LIST_HEAD(direct_window_list);
@ -725,16 +725,18 @@ static void remove_ddw(struct device_node *np, bool remove_prop)
 {
 	struct dynamic_dma_window_prop *dwp;
 	struct property *win64;
-	const u32 *ddw_avail;
+	u32 ddw_avail[3];
 	u64 liobn;
-	int len, ret = 0;
+	int ret = 0;
+
+	ret = of_property_read_u32_array(np, "ibm,ddw-applicable",
+					 &ddw_avail[0], 3);

-	ddw_avail = of_get_property(np, "ibm,ddw-applicable", &len);
 	win64 = of_find_property(np, DIRECT64_PROPNAME, NULL);
 	if (!win64)
 		return;

-	if (!ddw_avail || len < 3 * sizeof(u32) || win64->length < sizeof(*dwp))
+	if (ret || win64->length < sizeof(*dwp))
 		goto delprop;

 	dwp = win64->value;
@ -872,8 +874,9 @@ static int create_ddw(struct pci_dev *dev, const u32 *ddw_avail,

 	do {
 		/* extra outputs are LIOBN and dma-addr (hi, lo) */
-		ret = rtas_call(ddw_avail[1], 5, 4, (u32 *)create, cfg_addr,
-				BUID_HI(buid), BUID_LO(buid), page_shift, window_shift);
+		ret = rtas_call(ddw_avail[1], 5, 4, (u32 *)create,
+				cfg_addr, BUID_HI(buid), BUID_LO(buid),
+				page_shift, window_shift);
 	} while (rtas_busy_delay(ret));
 	dev_info(&dev->dev,
 		"ibm,create-pe-dma-window(%x) %x %x %x %x %x returned %d "
@ -910,7 +913,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 	int page_shift;
 	u64 dma_addr, max_addr;
 	struct device_node *dn;
-	const u32 *uninitialized_var(ddw_avail);
+	u32 ddw_avail[3];
 	struct direct_window *window;
 	struct property *win64;
 	struct dynamic_dma_window_prop *ddwprop;
@ -942,8 +945,9 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 	 * for the given node in that order.
 	 * the property is actually in the parent, not the PE
 	 */
-	ddw_avail = of_get_property(pdn, "ibm,ddw-applicable", &len);
-	if (!ddw_avail || len < 3 * sizeof(u32))
+	ret = of_property_read_u32_array(pdn, "ibm,ddw-applicable",
+					 &ddw_avail[0], 3);
+	if (ret)
 		goto out_failed;

       /*
@ -966,11 +970,11 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 		dev_dbg(&dev->dev, "no free dynamic windows");
 		goto out_failed;
 	}
-	if (be32_to_cpu(query.page_size) & 4) {
+	if (query.page_size & 4) {
 		page_shift = 24; /* 16MB */
-	} else if (be32_to_cpu(query.page_size) & 2) {
+	} else if (query.page_size & 2) {
 		page_shift = 16; /* 64kB */
-	} else if (be32_to_cpu(query.page_size) & 1) {
+	} else if (query.page_size & 1) {
 		page_shift = 12; /* 4kB */
 	} else {
 		dev_dbg(&dev->dev, "no supported direct page size in mask %x",
@ -980,7 +984,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 	/* verify the window * number of ptes will map the partition */
 	/* check largest block * page size > max memory hotplug addr */
 	max_addr = memory_hotplug_max();
-	if (be32_to_cpu(query.largest_available_block) < (max_addr >> page_shift)) {
+	if (query.largest_available_block < (max_addr >> page_shift)) {
 		dev_dbg(&dev->dev, "can't map partiton max 0x%llx with %u "
 			  "%llu-sized pages\n", max_addr,  query.largest_available_block,
 			  1ULL << page_shift);
@ -1006,8 +1010,9 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 	if (ret != 0)
 		goto out_free_prop;

-	ddwprop->liobn = create.liobn;
-	ddwprop->dma_base = cpu_to_be64(of_read_number(&create.addr_hi, 2));
+	ddwprop->liobn = cpu_to_be32(create.liobn);
+	ddwprop->dma_base = cpu_to_be64(((u64)create.addr_hi << 32) |
+			create.addr_lo);
 	ddwprop->tce_shift = cpu_to_be32(page_shift);
 	ddwprop->window_shift = cpu_to_be32(len);

@ -1039,7 +1044,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
 	list_add(&window->list, &direct_window_list);
 	spin_unlock(&direct_window_list_lock);

-	dma_addr = of_read_number(&create.addr_hi, 2);
+	dma_addr = be64_to_cpu(ddwprop->dma_base);
 	goto out_unlock;

 out_free_window:
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@ -2,6 +2,7 @@

 generic-y += clkdev.h
 generic-y += hash.h
+generic-y += irq_work.h
 generic-y += mcs_spinlock.h
 generic-y += preempt.h
 generic-y += scatterlist.h
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@ -85,6 +85,7 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
 			return 0;
 		if (vcpu->arch.sie_block->gcr[0] & 0x2000ul)
 			return 1;
+		return 0;
 	case KVM_S390_INT_EMERGENCY:
 		if (psw_extint_disabled(vcpu))
 			return 0;
--- a/arch/score/include/asm/Kbuild
+++ b/arch/score/include/asm/Kbuild
@ -6,6 +6,7 @@ generic-y += barrier.h
 generic-y += clkdev.h
 generic-y += cputime.h
 generic-y += hash.h
+generic-y += irq_work.h
 generic-y += mcs_spinlock.h
 generic-y += preempt.h
 generic-y += scatterlist.h
--- a/arch/sh/include/asm/Kbuild
+++ b/arch/sh/include/asm/Kbuild
@ -12,6 +12,7 @@ generic-y += hash.h
 generic-y += ioctl.h
 generic-y += ipcbuf.h
 generic-y += irq_regs.h
+generic-y += irq_work.h
 generic-y += kvm_para.h
 generic-y += local.h
 generic-y += local64.h
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@ -67,6 +67,7 @@ config SPARC64
 	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_CONTEXT_TRACKING
 	select HAVE_DEBUG_KMEMLEAK
+	select SPARSE_IRQ
 	select RTC_DRV_CMOS
 	select RTC_DRV_BQ4802
 	select RTC_DRV_SUN4V
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@ -8,6 +8,7 @@ generic-y += emergency-restart.h
 generic-y += exec.h
 generic-y += hash.h
 generic-y += irq_regs.h
+generic-y += irq_work.h
 generic-y += linkage.h
 generic-y += local.h
 generic-y += local64.h
--- a/arch/sparc/include/asm/hypervisor.h
+++ b/arch/sparc/include/asm/hypervisor.h
@ -2947,6 +2947,16 @@ unsigned long sun4v_vt_set_perfreg(unsigned long reg_num,
 				   unsigned long reg_val);
 #endif

+#define	HV_FAST_T5_GET_PERFREG		0x1a8
+#define	HV_FAST_T5_SET_PERFREG		0x1a9
+
+#ifndef	__ASSEMBLY__
+unsigned long sun4v_t5_get_perfreg(unsigned long reg_num,
+				   unsigned long *reg_val);
+unsigned long sun4v_t5_set_perfreg(unsigned long reg_num,
+				   unsigned long reg_val);
+#endif
+
 /* Function numbers for HV_CORE_TRAP.  */
 #define HV_CORE_SET_VER			0x00
 #define HV_CORE_PUTCHAR			0x01
@ -2978,6 +2988,7 @@ unsigned long sun4v_vt_set_perfreg(unsigned long reg_num,
 #define HV_GRP_VF_CPU			0x0205
 #define HV_GRP_KT_CPU			0x0209
 #define HV_GRP_VT_CPU			0x020c
+#define HV_GRP_T5_CPU			0x0211
 #define HV_GRP_DIAG			0x0300

 #ifndef __ASSEMBLY__
--- a/arch/sparc/include/asm/irq_64.h
+++ b/arch/sparc/include/asm/irq_64.h
@ -37,7 +37,7 @@
 *
 * ino_bucket->irq allocation is made during {sun4v_,}build_irq().
 */
-#define NR_IRQS    255
+#define NR_IRQS		(2048)

 void irq_install_pre_handler(int irq,
 			     void (*func)(unsigned int, void *, void *),
@ -57,11 +57,8 @@ unsigned int sun4u_build_msi(u32 portid, unsigned int *irq_p,
 			     unsigned long iclr_base);
 void sun4u_destroy_msi(unsigned int irq);

-unsigned char irq_alloc(unsigned int dev_handle,
-			unsigned int dev_ino);
-#ifdef CONFIG_PCI_MSI
+unsigned int irq_alloc(unsigned int dev_handle, unsigned int dev_ino);
 void irq_free(unsigned int irq);
-#endif

 void __init init_IRQ(void);
 void fixup_irqs(void);
--- a/arch/sparc/include/asm/ldc.h
+++ b/arch/sparc/include/asm/ldc.h
@ -53,13 +53,14 @@ struct ldc_channel;
 /* Allocate state for a channel.  */
 struct ldc_channel *ldc_alloc(unsigned long id,
 			      const struct ldc_channel_config *cfgp,
-			      void *event_arg);
+			      void *event_arg,
+			      const char *name);

 /* Shut down and free state for a channel.  */
 void ldc_free(struct ldc_channel *lp);

 /* Register TX and RX queues of the link with the hypervisor.  */
-int ldc_bind(struct ldc_channel *lp, const char *name);
+int ldc_bind(struct ldc_channel *lp);

 /* For non-RAW protocols we need to complete a handshake before
 * communication can proceed.  ldc_connect() does that, if the
--- a/arch/sparc/include/asm/oplib_64.h
+++ b/arch/sparc/include/asm/oplib_64.h
@ -62,7 +62,8 @@ struct linux_mem_p1275 {
 /* You must call prom_init() before using any of the library services,
 * preferably as early as possible.  Pass it the romvec pointer.
 */
-void prom_init(void *cif_handler, void *cif_stack);
+void prom_init(void *cif_handler);
+void prom_init_report(void);

 /* Boot argument acquisition, returns the boot command line string. */
 char *prom_getbootargs(void);
--- a/arch/sparc/include/asm/page_64.h
+++ b/arch/sparc/include/asm/page_64.h
@ -57,18 +57,21 @@ void copy_user_page(void *to, void *from, unsigned long vaddr, struct page *topa
 typedef struct { unsigned long pte; } pte_t;
 typedef struct { unsigned long iopte; } iopte_t;
 typedef struct { unsigned long pmd; } pmd_t;
+typedef struct { unsigned long pud; } pud_t;
 typedef struct { unsigned long pgd; } pgd_t;
 typedef struct { unsigned long pgprot; } pgprot_t;

 #define pte_val(x)	((x).pte)
 #define iopte_val(x)	((x).iopte)
 #define pmd_val(x)      ((x).pmd)
+#define pud_val(x)      ((x).pud)
 #define pgd_val(x)	((x).pgd)
 #define pgprot_val(x)	((x).pgprot)

 #define __pte(x)	((pte_t) { (x) } )
 #define __iopte(x)	((iopte_t) { (x) } )
 #define __pmd(x)        ((pmd_t) { (x) } )
+#define __pud(x)        ((pud_t) { (x) } )
 #define __pgd(x)	((pgd_t) { (x) } )
 #define __pgprot(x)	((pgprot_t) { (x) } )

@ -77,18 +80,21 @@ typedef struct { unsigned long pgprot; } pgprot_t;
 typedef unsigned long pte_t;
 typedef unsigned long iopte_t;
 typedef unsigned long pmd_t;
+typedef unsigned long pud_t;
 typedef unsigned long pgd_t;
 typedef unsigned long pgprot_t;

 #define pte_val(x)	(x)
 #define iopte_val(x)	(x)
 #define pmd_val(x)      (x)
+#define pud_val(x)      (x)
 #define pgd_val(x)	(x)
 #define pgprot_val(x)	(x)

 #define __pte(x)	(x)
 #define __iopte(x)	(x)
 #define __pmd(x)        (x)
+#define __pud(x)        (x)
 #define __pgd(x)	(x)
 #define __pgprot(x)	(x)

@ -96,21 +102,14 @@ typedef unsigned long pgprot_t;

 typedef pte_t *pgtable_t;

-/* These two values define the virtual address space range in which we
- * must forbid 64-bit user processes from making mappings.  It used to
- * represent precisely the virtual address space hole present in most
- * early sparc64 chips including UltraSPARC-I.  But now it also is
- * further constrained by the limits of our page tables, which is
- * 43-bits of virtual address.
- */
-#define SPARC64_VA_HOLE_TOP	_AC(0xfffffc0000000000,UL)
-#define SPARC64_VA_HOLE_BOTTOM	_AC(0x0000040000000000,UL)
+extern unsigned long sparc64_va_hole_top;
+extern unsigned long sparc64_va_hole_bottom;

 /* The next two defines specify the actual exclusion region we
 * enforce, wherein we use a 4GB red zone on each side of the VA hole.
 */
-#define VA_EXCLUDE_START (SPARC64_VA_HOLE_BOTTOM - (1UL << 32UL))
-#define VA_EXCLUDE_END   (SPARC64_VA_HOLE_TOP + (1UL << 32UL))
+#define VA_EXCLUDE_START (sparc64_va_hole_bottom - (1UL << 32UL))
+#define VA_EXCLUDE_END   (sparc64_va_hole_top + (1UL << 32UL))

 #define TASK_UNMAPPED_BASE	(test_thread_flag(TIF_32BIT) ? \
 				 _AC(0x0000000070000000,UL) : \
@ -118,20 +117,16 @@ typedef pte_t *pgtable_t;

 #include <asm-generic/memory_model.h>

-#define PAGE_OFFSET_BY_BITS(X)	(-(_AC(1,UL) << (X)))
 extern unsigned long PAGE_OFFSET;

 #endif /* !(__ASSEMBLY__) */

-/* The maximum number of physical memory address bits we support, this
- * is used to size various tables used to manage kernel TLB misses and
- * also the sparsemem code.
+/* The maximum number of physical memory address bits we support.  The
+ * largest value we can support is whatever "KPGD_SHIFT + KPTE_BITS"
+ * evaluates to.
 */
-#define MAX_PHYS_ADDRESS_BITS	47
+#define MAX_PHYS_ADDRESS_BITS	53

-/* These two shift counts are used when indexing sparc64_valid_addr_bitmap
- * and kpte_linear_bitmap.
- */
 #define ILOG2_4MB		22
 #define ILOG2_256MB		28

--- a/arch/sparc/include/asm/pgalloc_64.h
+++ b/arch/sparc/include/asm/pgalloc_64.h
@ -15,6 +15,13 @@

 extern struct kmem_cache *pgtable_cache;

+static inline void __pgd_populate(pgd_t *pgd, pud_t *pud)
+{
+	pgd_set(pgd, pud);
+}
+
+#define pgd_populate(MM, PGD, PUD)	__pgd_populate(PGD, PUD)
+
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
 	return kmem_cache_alloc(pgtable_cache, GFP_KERNEL);
@ -25,7 +32,23 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	kmem_cache_free(pgtable_cache, pgd);
 }

-#define pud_populate(MM, PUD, PMD)	pud_set(PUD, PMD)
+static inline void __pud_populate(pud_t *pud, pmd_t *pmd)
+{
+	pud_set(pud, pmd);
+}
+
+#define pud_populate(MM, PUD, PMD)	__pud_populate(PUD, PMD)
+
+static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+	return kmem_cache_alloc(pgtable_cache,
+				GFP_KERNEL|__GFP_REPEAT);
+}
+
+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+{
+	kmem_cache_free(pgtable_cache, pud);
+}

 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
@ -91,4 +114,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pte_t *pte,
 #define __pmd_free_tlb(tlb, pmd, addr)		      \
 	pgtable_free_tlb(tlb, pmd, false)

+#define __pud_free_tlb(tlb, pud, addr)		      \
+	pgtable_free_tlb(tlb, pud, false)
+
 #endif /* _SPARC64_PGALLOC_H */
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@ -20,8 +20,6 @@
 #include <asm/page.h>
 #include <asm/processor.h>

-#include <asm-generic/pgtable-nopud.h>
-
 /* The kernel image occupies 0x4000000 to 0x6000000 (4MB --> 96MB).
 * The page copy blockops can use 0x6000000 to 0x8000000.
 * The 8K TSB is mapped in the 0x8000000 to 0x8400000 range.
@ -42,10 +40,7 @@
 #define LOW_OBP_ADDRESS		_AC(0x00000000f0000000,UL)
 #define HI_OBP_ADDRESS		_AC(0x0000000100000000,UL)
 #define VMALLOC_START		_AC(0x0000000100000000,UL)
-#define VMALLOC_END		_AC(0x0000010000000000,UL)
-#define VMEMMAP_BASE		_AC(0x0000010000000000,UL)
-
-#define vmemmap			((struct page *)VMEMMAP_BASE)
+#define VMEMMAP_BASE		VMALLOC_END

 /* PMD_SHIFT determines the size of the area a second-level page
 * table can map
@ -55,13 +50,25 @@
 #define PMD_MASK	(~(PMD_SIZE-1))
 #define PMD_BITS	(PAGE_SHIFT - 3)

-/* PGDIR_SHIFT determines what a third-level page table entry can map */
-#define PGDIR_SHIFT	(PAGE_SHIFT + (PAGE_SHIFT-3) + PMD_BITS)
+/* PUD_SHIFT determines the size of the area a third-level page
+ * table can map
+ */
+#define PUD_SHIFT	(PMD_SHIFT + PMD_BITS)
+#define PUD_SIZE	(_AC(1,UL) << PUD_SHIFT)
+#define PUD_MASK	(~(PUD_SIZE-1))
+#define PUD_BITS	(PAGE_SHIFT - 3)
+
+/* PGDIR_SHIFT determines what a fourth-level page table entry can map */
+#define PGDIR_SHIFT	(PUD_SHIFT + PUD_BITS)
 #define PGDIR_SIZE	(_AC(1,UL) << PGDIR_SHIFT)
 #define PGDIR_MASK	(~(PGDIR_SIZE-1))
 #define PGDIR_BITS	(PAGE_SHIFT - 3)

-#if (PGDIR_SHIFT + PGDIR_BITS) != 43
+#if (MAX_PHYS_ADDRESS_BITS > PGDIR_SHIFT + PGDIR_BITS)
+#error MAX_PHYS_ADDRESS_BITS exceeds what kernel page tables can support
+#endif
+
+#if (PGDIR_SHIFT + PGDIR_BITS) != 53
 #error Page table parameters do not cover virtual address space properly.
 #endif

@ -71,28 +78,18 @@

 #ifndef __ASSEMBLY__

+extern unsigned long VMALLOC_END;
+
+#define vmemmap			((struct page *)VMEMMAP_BASE)
+
 #include <linux/sched.h>

-extern unsigned long sparc64_valid_addr_bitmap[];
-
-/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
-static inline bool __kern_addr_valid(unsigned long paddr)
-{
-	if ((paddr >> MAX_PHYS_ADDRESS_BITS) != 0UL)
-		return false;
-	return test_bit(paddr >> ILOG2_4MB, sparc64_valid_addr_bitmap);
-}
-
-static inline bool kern_addr_valid(unsigned long addr)
-{
-	unsigned long paddr = __pa(addr);
-
-	return __kern_addr_valid(paddr);
-}
+bool kern_addr_valid(unsigned long addr);

 /* Entries per page directory level. */
 #define PTRS_PER_PTE	(1UL << (PAGE_SHIFT-3))
 #define PTRS_PER_PMD	(1UL << PMD_BITS)
+#define PTRS_PER_PUD	(1UL << PUD_BITS)
 #define PTRS_PER_PGD	(1UL << PGDIR_BITS)

 /* Kernel has a separate 44bit address space. */
@ -101,6 +98,9 @@ static inline bool kern_addr_valid(unsigned long addr)
 #define pmd_ERROR(e)							\
 	pr_err("%s:%d: bad pmd %p(%016lx) seen at (%pS)\n",		\
 	       __FILE__, __LINE__, &(e), pmd_val(e), __builtin_return_address(0))
+#define pud_ERROR(e)							\
+	pr_err("%s:%d: bad pud %p(%016lx) seen at (%pS)\n",		\
+	       __FILE__, __LINE__, &(e), pud_val(e), __builtin_return_address(0))
 #define pgd_ERROR(e)							\
 	pr_err("%s:%d: bad pgd %p(%016lx) seen at (%pS)\n",		\
 	       __FILE__, __LINE__, &(e), pgd_val(e), __builtin_return_address(0))
@ -112,6 +112,7 @@ static inline bool kern_addr_valid(unsigned long addr)
 #define _PAGE_R	  	  _AC(0x8000000000000000,UL) /* Keep ref bit uptodate*/
 #define _PAGE_SPECIAL     _AC(0x0200000000000000,UL) /* Special page         */
 #define _PAGE_PMD_HUGE    _AC(0x0100000000000000,UL) /* Huge page            */
+#define _PAGE_PUD_HUGE    _PAGE_PMD_HUGE

 /* Advertise support for _PAGE_SPECIAL */
 #define __HAVE_ARCH_PTE_SPECIAL
@ -658,6 +659,13 @@ static inline unsigned long pmd_large(pmd_t pmd)
 	return pte_val(pte) & _PAGE_PMD_HUGE;
 }

+static inline unsigned long pmd_pfn(pmd_t pmd)
+{
+	pte_t pte = __pte(pmd_val(pmd));
+
+	return pte_pfn(pte);
+}
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 static inline unsigned long pmd_young(pmd_t pmd)
 {
@ -673,13 +681,6 @@ static inline unsigned long pmd_write(pmd_t pmd)
 	return pte_write(pte);
 }

-static inline unsigned long pmd_pfn(pmd_t pmd)
-{
-	pte_t pte = __pte(pmd_val(pmd));
-
-	return pte_pfn(pte);
-}
-
 static inline unsigned long pmd_trans_huge(pmd_t pmd)
 {
 	pte_t pte = __pte(pmd_val(pmd));
@ -771,13 +772,15 @@ static inline int pmd_present(pmd_t pmd)
 * the top bits outside of the range of any physical address size we
 * support are clear as well.  We also validate the physical itself.
 */
-#define pmd_bad(pmd)			((pmd_val(pmd) & ~PAGE_MASK) || \
-					 !__kern_addr_valid(pmd_val(pmd)))
+#define pmd_bad(pmd)			(pmd_val(pmd) & ~PAGE_MASK)

 #define pud_none(pud)			(!pud_val(pud))

-#define pud_bad(pud)			((pud_val(pud) & ~PAGE_MASK) || \
-					 !__kern_addr_valid(pud_val(pud)))
+#define pud_bad(pud)			(pud_val(pud) & ~PAGE_MASK)
+
+#define pgd_none(pgd)			(!pgd_val(pgd))
+
+#define pgd_bad(pgd)			(pgd_val(pgd) & ~PAGE_MASK)

 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 void set_pmd_at(struct mm_struct *mm, unsigned long addr,
@ -815,10 +818,31 @@ static inline unsigned long __pmd_page(pmd_t pmd)
 #define pmd_clear(pmdp)			(pmd_val(*(pmdp)) = 0UL)
 #define pud_present(pud)		(pud_val(pud) != 0U)
 #define pud_clear(pudp)			(pud_val(*(pudp)) = 0UL)
+#define pgd_page_vaddr(pgd)		\
+	((unsigned long) __va(pgd_val(pgd)))
+#define pgd_present(pgd)		(pgd_val(pgd) != 0U)
+#define pgd_clear(pgdp)			(pgd_val(*(pgd)) = 0UL)
+
+static inline unsigned long pud_large(pud_t pud)
+{
+	pte_t pte = __pte(pud_val(pud));
+
+	return pte_val(pte) & _PAGE_PMD_HUGE;
+}
+
+static inline unsigned long pud_pfn(pud_t pud)
+{
+	pte_t pte = __pte(pud_val(pud));
+
+	return pte_pfn(pte);
+}

 /* Same in both SUN4V and SUN4U.  */
 #define pte_none(pte) 			(!pte_val(pte))

+#define pgd_set(pgdp, pudp)	\
+	(pgd_val(*(pgdp)) = (__pa((unsigned long) (pudp))))
+
 /* to find an entry in a page-table-directory. */
 #define pgd_index(address)	(((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
 #define pgd_offset(mm, address)	((mm)->pgd + pgd_index(address))
@ -826,6 +850,11 @@ static inline unsigned long __pmd_page(pmd_t pmd)
 /* to find an entry in a kernel page-table-directory */
 #define pgd_offset_k(address) pgd_offset(&init_mm, address)

+/* Find an entry in the third-level page table.. */
+#define pud_index(address)	(((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
+#define pud_offset(pgdp, address)	\
+	((pud_t *) pgd_page_vaddr(*(pgdp)) + pud_index(address))
+
 /* Find an entry in the second-level page table.. */
 #define pmd_offset(pudp, address)	\
 	((pmd_t *) pud_page_vaddr(*(pudp)) + \
@ -898,7 +927,6 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
 #endif

 extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
-extern pmd_t swapper_low_pmd_dir[PTRS_PER_PMD];

 void paging_init(void);
 unsigned long find_ecache_flush_span(unsigned long size);
--- a/arch/sparc/include/asm/setup.h
+++ b/arch/sparc/include/asm/setup.h
@ -48,6 +48,8 @@ unsigned long safe_compute_effective_address(struct pt_regs *, unsigned int);
 #endif

 #ifdef CONFIG_SPARC64
+void __init start_early_boot(void);
+
 /* unaligned_64.c */
 int handle_ldf_stq(u32 insn, struct pt_regs *regs);
 void handle_ld_nf(u32 insn, struct pt_regs *regs);
--- a/arch/sparc/include/asm/spitfire.h
+++ b/arch/sparc/include/asm/spitfire.h
@ -45,6 +45,8 @@
 #define SUN4V_CHIP_NIAGARA3	0x03
 #define SUN4V_CHIP_NIAGARA4	0x04
 #define SUN4V_CHIP_NIAGARA5	0x05
+#define SUN4V_CHIP_SPARC_M6	0x06
+#define SUN4V_CHIP_SPARC_M7	0x07
 #define SUN4V_CHIP_SPARC64X	0x8a
 #define SUN4V_CHIP_UNKNOWN	0xff

--- a/arch/sparc/include/asm/thread_info_64.h
+++ b/arch/sparc/include/asm/thread_info_64.h
@ -63,7 +63,8 @@ struct thread_info {
 	struct pt_regs		*kern_una_regs;
 	unsigned int		kern_una_insn;

-	unsigned long		fpregs[0] __attribute__ ((aligned(64)));
+	unsigned long		fpregs[(7 * 256) / sizeof(unsigned long)]
+		__attribute__ ((aligned(64)));
 };

 #endif /* !(__ASSEMBLY__) */
@ -102,6 +103,7 @@ struct thread_info {
 #define FAULT_CODE_ITLB		0x04	/* Miss happened in I-TLB	   */
 #define FAULT_CODE_WINFIXUP	0x08	/* Miss happened during spill/fill */
 #define FAULT_CODE_BLKCOMMIT	0x10	/* Use blk-commit ASI in copy_page */
+#define	FAULT_CODE_BAD_RA	0x20	/* Bad RA for sun4v		   */

 #if PAGE_SHIFT == 13
 #define THREAD_SIZE (2*PAGE_SIZE)
--- a/arch/sparc/include/asm/tsb.h
+++ b/arch/sparc/include/asm/tsb.h
@ -133,9 +133,24 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
 	sub	TSB, 0x8, TSB;   \
 	TSB_STORE(TSB, TAG);

-	/* Do a kernel page table walk.  Leaves physical PTE pointer in
-	 * REG1.  Jumps to FAIL_LABEL on early page table walk termination.
-	 * VADDR will not be clobbered, but REG2 will.
+	/* Do a kernel page table walk.  Leaves valid PTE value in
+	 * REG1.  Jumps to FAIL_LABEL on early page table walk
+	 * termination.  VADDR will not be clobbered, but REG2 will.
+	 *
+	 * There are two masks we must apply to propagate bits from
+	 * the virtual address into the PTE physical address field
+	 * when dealing with huge pages.  This is because the page
+	 * table boundaries do not match the huge page size(s) the
+	 * hardware supports.
+	 *
+	 * In these cases we propagate the bits that are below the
+	 * page table level where we saw the huge page mapping, but
+	 * are still within the relevant physical bits for the huge
+	 * page size in question.  So for PMD mappings (which fall on
+	 * bit 23, for 8MB per PMD) we must propagate bit 22 for a
+	 * 4MB huge page.  For huge PUDs (which fall on bit 33, for
+	 * 8GB per PUD), we have to accomodate 256MB and 2GB huge
+	 * pages.  So for those we propagate bits 32 to 28.
 	 */
 #define KERN_PGTABLE_WALK(VADDR, REG1, REG2, FAIL_LABEL)	\
 	sethi		%hi(swapper_pg_dir), REG1; \
@ -145,15 +160,40 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
 	andn		REG2, 0x7, REG2; \
 	ldx		[REG1 + REG2], REG1; \
 	brz,pn		REG1, FAIL_LABEL; \
-	 sllx		VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \
+	 sllx		VADDR, 64 - (PUD_SHIFT + PUD_BITS), REG2; \
 	srlx		REG2, 64 - PAGE_SHIFT, REG2; \
 	andn		REG2, 0x7, REG2; \
 	ldxa		[REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
 	brz,pn		REG1, FAIL_LABEL; \
-	 sllx		VADDR, 64 - PMD_SHIFT, REG2; \
+	sethi		%uhi(_PAGE_PUD_HUGE), REG2; \
+	brz,pn		REG1, FAIL_LABEL; \
+	 sllx		REG2, 32, REG2; \
+	andcc		REG1, REG2, %g0; \
+	sethi		%hi(0xf8000000), REG2; \
+	bne,pt		%xcc, 697f; \
+	 sllx		REG2, 1, REG2; \
+	sllx		VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \
 	srlx		REG2, 64 - PAGE_SHIFT, REG2; \
 	andn		REG2, 0x7, REG2; \
-	add		REG1, REG2, REG1;
+	ldxa		[REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
+	sethi		%uhi(_PAGE_PMD_HUGE), REG2; \
+	brz,pn		REG1, FAIL_LABEL; \
+	 sllx		REG2, 32, REG2; \
+	andcc		REG1, REG2, %g0; \
+	be,pn		%xcc, 698f; \
+	 sethi		%hi(0x400000), REG2; \
+697:	brgez,pn	REG1, FAIL_LABEL; \
+	 andn		REG1, REG2, REG1; \
+	and		VADDR, REG2, REG2; \
+	ba,pt		%xcc, 699f; \
+	 or		REG1, REG2, REG1; \
+698:	sllx		VADDR, 64 - PMD_SHIFT, REG2; \
+	srlx		REG2, 64 - PAGE_SHIFT, REG2; \
+	andn		REG2, 0x7, REG2; \
+	ldxa		[REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
+	brgez,pn	REG1, FAIL_LABEL; \
+	 nop; \
+699:

 	/* PMD has been loaded into REG1, interpret the value, seeing
 	 * if it is a HUGE PMD or a normal one.  If it is not valid
@ -197,6 +237,11 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
 	srlx		REG2, 64 - PAGE_SHIFT, REG2; \
 	andn		REG2, 0x7, REG2; \
 	ldxa		[PHYS_PGD + REG2] ASI_PHYS_USE_EC, REG1; \
+	brz,pn		REG1, FAIL_LABEL; \
+	 sllx		VADDR, 64 - (PUD_SHIFT + PUD_BITS), REG2; \
+	srlx		REG2, 64 - PAGE_SHIFT, REG2; \
+	andn		REG2, 0x7, REG2; \
+	ldxa		[REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
 	brz,pn		REG1, FAIL_LABEL; \
 	 sllx		VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \
 	srlx		REG2, 64 - PAGE_SHIFT, REG2; \
@ -246,8 +291,6 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
 	(KERNEL_TSB_SIZE_BYTES / 16)
 #define KERNEL_TSB4M_NENTRIES	4096

-#define KTSB_PHYS_SHIFT		15
-
 	/* Do a kernel TSB lookup at tl>0 on VADDR+TAG, branch to OK_LABEL
 	 * on TSB hit.  REG1, REG2, REG3, and REG4 are used as temporaries
 	 * and the found TTE will be left in REG1.  REG3 and REG4 must
@ -256,17 +299,15 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
 	 * VADDR and TAG will be preserved and not clobbered by this macro.
 	 */
 #define KERN_TSB_LOOKUP_TL1(VADDR, TAG, REG1, REG2, REG3, REG4, OK_LABEL) \
-661:	sethi		%hi(swapper_tsb), REG1;			\
-	or		REG1, %lo(swapper_tsb), REG1; \
+661:	sethi		%uhi(swapper_tsb), REG1; \
+	sethi		%hi(swapper_tsb), REG2; \
+	or		REG1, %ulo(swapper_tsb), REG1; \
+	or		REG2, %lo(swapper_tsb), REG2; \
 	.section	.swapper_tsb_phys_patch, "ax"; \
 	.word		661b; \
 	.previous; \
-661:	nop; \
-	.section	.tsb_ldquad_phys_patch, "ax"; \
-	.word		661b; \
-	sllx		REG1, KTSB_PHYS_SHIFT, REG1; \
-	sllx		REG1, KTSB_PHYS_SHIFT, REG1; \
-	.previous; \
+	sllx		REG1, 32, REG1; \
+	or		REG1, REG2, REG1; \
 	srlx		VADDR, PAGE_SHIFT, REG2; \
 	and		REG2, (KERNEL_TSB_NENTRIES - 1), REG2; \
 	sllx		REG2, 4, REG2; \
@ -281,17 +322,15 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
 	 * we can make use of that for the index computation.
 	 */
 #define KERN_TSB4M_LOOKUP_TL1(TAG, REG1, REG2, REG3, REG4, OK_LABEL) \
-661:	sethi		%hi(swapper_4m_tsb), REG1;	     \
-	or		REG1, %lo(swapper_4m_tsb), REG1; \
+661:	sethi		%uhi(swapper_4m_tsb), REG1; \
+	sethi		%hi(swapper_4m_tsb), REG2; \
+	or		REG1, %ulo(swapper_4m_tsb), REG1; \
+	or		REG2, %lo(swapper_4m_tsb), REG2; \
 	.section	.swapper_4m_tsb_phys_patch, "ax"; \
 	.word		661b; \
 	.previous; \
-661:	nop; \
-	.section	.tsb_ldquad_phys_patch, "ax"; \
-	.word		661b; \
-	sllx		REG1, KTSB_PHYS_SHIFT, REG1; \
-	sllx		REG1, KTSB_PHYS_SHIFT, REG1; \
-	.previous; \
+	sllx		REG1, 32, REG1; \
+	or		REG1, REG2, REG1; \
 	and		TAG, (KERNEL_TSB4M_NENTRIES - 1), REG2; \
 	sllx		REG2, 4, REG2; \
 	add		REG1, REG2, REG2; \
--- a/arch/sparc/include/asm/visasm.h
+++ b/arch/sparc/include/asm/visasm.h
@ -39,6 +39,14 @@
 297:	wr		%o5, FPRS_FEF, %fprs;		\
 298:

+#define VISEntryHalfFast(fail_label)			\
+	rd		%fprs, %o5;			\
+	andcc		%o5, FPRS_FEF, %g0;		\
+	be,pt		%icc, 297f;			\
+	 nop;						\
+	ba,a,pt		%xcc, fail_label;		\
+297:	wr		%o5, FPRS_FEF, %fprs;
+
 #define VISExitHalf					\
 	wr		%o5, 0, %fprs;

--- a/arch/sparc/kernel/cpu.c
+++ b/arch/sparc/kernel/cpu.c
@ -494,6 +494,18 @@ static void __init sun4v_cpu_probe(void)
 		sparc_pmu_type = "niagara5";
 		break;

+	case SUN4V_CHIP_SPARC_M6:
+		sparc_cpu_type = "SPARC-M6";
+		sparc_fpu_type = "SPARC-M6 integrated FPU";
+		sparc_pmu_type = "sparc-m6";
+		break;
+
+	case SUN4V_CHIP_SPARC_M7:
+		sparc_cpu_type = "SPARC-M7";
+		sparc_fpu_type = "SPARC-M7 integrated FPU";
+		sparc_pmu_type = "sparc-m7";
+		break;
+
 	case SUN4V_CHIP_SPARC64X:
 		sparc_cpu_type = "SPARC64-X";
 		sparc_fpu_type = "SPARC64-X integrated FPU";
--- a/arch/sparc/kernel/cpumap.c
+++ b/arch/sparc/kernel/cpumap.c
@ -326,6 +326,8 @@ static int iterate_cpu(struct cpuinfo_tree *t, unsigned int root_index)
 	case SUN4V_CHIP_NIAGARA3:
 	case SUN4V_CHIP_NIAGARA4:
 	case SUN4V_CHIP_NIAGARA5:
+	case SUN4V_CHIP_SPARC_M6:
+	case SUN4V_CHIP_SPARC_M7:
 	case SUN4V_CHIP_SPARC64X:
 		rover_inc_table = niagara_iterate_method;
 		break;
--- a/arch/sparc/kernel/ds.c
+++ b/arch/sparc/kernel/ds.c
@ -1200,14 +1200,14 @@ static int ds_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 	ds_cfg.tx_irq = vdev->tx_irq;
 	ds_cfg.rx_irq = vdev->rx_irq;

-	lp = ldc_alloc(vdev->channel_id, &ds_cfg, dp);
+	lp = ldc_alloc(vdev->channel_id, &ds_cfg, dp, "DS");
 	if (IS_ERR(lp)) {
 		err = PTR_ERR(lp);
 		goto out_free_ds_states;
 	}
 	dp->lp = lp;

-	err = ldc_bind(lp, "DS");
+	err = ldc_bind(lp);
 	if (err)
 		goto out_free_ldc;

--- a/arch/sparc/kernel/dtlb_prot.S
+++ b/arch/sparc/kernel/dtlb_prot.S
@ -24,11 +24,11 @@
 	mov		TLB_TAG_ACCESS, %g4		! For reload of vaddr

 /* PROT ** ICACHE line 2: More real fault processing */
+	ldxa		[%g4] ASI_DMMU, %g5		! Put tagaccess in %g5
 	bgu,pn		%xcc, winfix_trampoline		! Yes, perform winfixup
-	 ldxa		[%g4] ASI_DMMU, %g5		! Put tagaccess in %g5
-	ba,pt		%xcc, sparc64_realfault_common	! Nope, normal fault
 	 mov		FAULT_CODE_DTLB | FAULT_CODE_WRITE, %g4
-	nop
+	ba,pt		%xcc, sparc64_realfault_common	! Nope, normal fault
+	 nop
 	nop
 	nop
 	nop
--- a/arch/sparc/kernel/entry.h
+++ b/arch/sparc/kernel/entry.h
@ -65,13 +65,10 @@ struct pause_patch_entry {
 extern struct pause_patch_entry __pause_3insn_patch,
 	__pause_3insn_patch_end;

-void __init per_cpu_patch(void);
 void sun4v_patch_1insn_range(struct sun4v_1insn_patch_entry *,
 			     struct sun4v_1insn_patch_entry *);
 void sun4v_patch_2insn_range(struct sun4v_2insn_patch_entry *,
 			     struct sun4v_2insn_patch_entry *);
-void __init sun4v_patch(void);
-void __init boot_cpu_id_too_large(int cpu);
 extern unsigned int dcache_parity_tl1_occurred;
 extern unsigned int icache_parity_tl1_occurred;

--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@ -427,6 +427,12 @@ sun4v_chip_type:
 	cmp	%g2, '5'
 	be,pt	%xcc, 5f
 	 mov	SUN4V_CHIP_NIAGARA5, %g4
+	cmp	%g2, '6'
+	be,pt	%xcc, 5f
+	 mov	SUN4V_CHIP_SPARC_M6, %g4
+	cmp	%g2, '7'
+	be,pt	%xcc, 5f
+	 mov	SUN4V_CHIP_SPARC_M7, %g4
 	ba,pt	%xcc, 49f
 	 nop

@ -583,6 +589,12 @@ niagara_tlb_fixup:
 	be,pt	%xcc, niagara4_patch
 	 nop
 	cmp	%g1, SUN4V_CHIP_NIAGARA5
+	be,pt	%xcc, niagara4_patch
+	 nop
+	cmp	%g1, SUN4V_CHIP_SPARC_M6
+	be,pt	%xcc, niagara4_patch
+	 nop
+	cmp	%g1, SUN4V_CHIP_SPARC_M7
 	be,pt	%xcc, niagara4_patch
 	 nop

@ -660,14 +672,12 @@ tlb_fixup_done:
 	sethi	%hi(init_thread_union), %g6
 	or	%g6, %lo(init_thread_union), %g6
 	ldx	[%g6 + TI_TASK], %g4
-	mov	%sp, %l6

 	wr	%g0, ASI_P, %asi
 	mov	1, %g1
 	sllx	%g1, THREAD_SHIFT, %g1
 	sub	%g1, (STACKFRAME_SZ + STACK_BIAS), %g1
 	add	%g6, %g1, %sp
-	mov	0, %fp

 	/* Set per-cpu pointer initially to zero, this makes
 	 * the boot-cpu use the in-kernel-image per-cpu areas
@ -694,44 +704,14 @@ tlb_fixup_done:
 	 nop
 #endif

-	mov	%l6, %o1			! OpenPROM stack
 	call	prom_init
 	 mov	%l7, %o0			! OpenPROM cif handler

-	/* Initialize current_thread_info()->cpu as early as possible.
-	 * In order to do that accurately we have to patch up the get_cpuid()
-	 * assembler sequences.  And that, in turn, requires that we know
-	 * if we are on a Starfire box or not.  While we're here, patch up
-	 * the sun4v sequences as well.
+	/* To create a one-register-window buffer between the kernel's
+	 * initial stack and the last stack frame we use from the firmware,
+	 * do the rest of the boot from a C helper function.
 	 */
-	call	check_if_starfire
-	 nop
-	call	per_cpu_patch
-	 nop
-	call	sun4v_patch
-	 nop
-
-#ifdef CONFIG_SMP
-	call	hard_smp_processor_id
-	 nop
-	cmp	%o0, NR_CPUS
-	blu,pt	%xcc, 1f
-	 nop
-	call	boot_cpu_id_too_large
-	 nop
-	/* Not reached... */
-
-1:
-#else
-	mov	0, %o0
-#endif
-	sth	%o0, [%g6 + TI_CPU]
-
-	call	prom_init_report
-	 nop
-
-	/* Off we go.... */
-	call	start_kernel
+	call	start_early_boot
 	 nop
 	/* Not reached... */

--- a/arch/sparc/kernel/hvapi.c
+++ b/arch/sparc/kernel/hvapi.c
@ -46,6 +46,7 @@ static struct api_info api_table[] = {
 	{ .group = HV_GRP_VF_CPU,				},
 	{ .group = HV_GRP_KT_CPU,				},
 	{ .group = HV_GRP_VT_CPU,				},
+	{ .group = HV_GRP_T5_CPU,				},
 	{ .group = HV_GRP_DIAG,		.flags = FLAG_PRE_API	},
 };

--- a/arch/sparc/kernel/hvcalls.S
+++ b/arch/sparc/kernel/hvcalls.S
@ -821,3 +821,19 @@ ENTRY(sun4v_vt_set_perfreg)
 	retl
 	 nop
 ENDPROC(sun4v_vt_set_perfreg)
+
+ENTRY(sun4v_t5_get_perfreg)
+	mov	%o1, %o4
+	mov	HV_FAST_T5_GET_PERFREG, %o5
+	ta	HV_FAST_TRAP
+	stx	%o1, [%o4]
+	retl
+	 nop
+ENDPROC(sun4v_t5_get_perfreg)
+
+ENTRY(sun4v_t5_set_perfreg)
+	mov	HV_FAST_T5_SET_PERFREG, %o5
+	ta	HV_FAST_TRAP
+	retl
+	 nop
+ENDPROC(sun4v_t5_set_perfreg)
--- a/arch/sparc/kernel/hvtramp.S
+++ b/arch/sparc/kernel/hvtramp.S
@ -109,7 +109,6 @@ hv_cpu_startup:
 	sllx		%g5, THREAD_SHIFT, %g5
 	sub		%g5, (STACKFRAME_SZ + STACK_BIAS), %g5
 	add		%g6, %g5, %sp
-	mov		0, %fp

 	call		init_irqwork_curcpu
 	 nop
--- a/arch/sparc/kernel/ioport.c
+++ b/arch/sparc/kernel/ioport.c
@ -278,7 +278,8 @@ static void *sbus_alloc_coherent(struct device *dev, size_t len,
 	}

 	order = get_order(len_total);
-	if ((va = __get_free_pages(GFP_KERNEL|__GFP_COMP, order)) == 0)
+	va = __get_free_pages(gfp, order);
+	if (va == 0)
 		goto err_nopages;

 	if ((res = kzalloc(sizeof(struct resource), GFP_KERNEL)) == NULL)
@ -443,7 +444,7 @@ static void *pci32_alloc_coherent(struct device *dev, size_t len,
 	}

 	order = get_order(len_total);
-	va = (void *) __get_free_pages(GFP_KERNEL, order);
+	va = (void *) __get_free_pages(gfp, order);
 	if (va == NULL) {
 		printk("pci_alloc_consistent: no %ld pages\n", len_total>>PAGE_SHIFT);
 		goto err_nopages;
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@ -47,8 +47,6 @@
 #include "cpumap.h"
 #include "kstack.h"

-#define NUM_IVECS	(IMAP_INR + 1)
-
 struct ino_bucket *ivector_table;
 unsigned long ivector_table_pa;

@ -107,55 +105,196 @@ static void bucket_set_irq(unsigned long bucket_pa, unsigned int irq)

 #define irq_work_pa(__cpu)	&(trap_block[(__cpu)].irq_worklist_pa)

-static struct {
-	unsigned int dev_handle;
-	unsigned int dev_ino;
-	unsigned int in_use;
-} irq_table[NR_IRQS];
-static DEFINE_SPINLOCK(irq_alloc_lock);
-
-unsigned char irq_alloc(unsigned int dev_handle, unsigned int dev_ino)
+static unsigned long hvirq_major __initdata;
+static int __init early_hvirq_major(char *p)
 {
-	unsigned long flags;
-	unsigned char ent;
+	int rc = kstrtoul(p, 10, &hvirq_major);

-	BUILD_BUG_ON(NR_IRQS >= 256);
+	return rc;
+}
+early_param("hvirq", early_hvirq_major);

-	spin_lock_irqsave(&irq_alloc_lock, flags);
+static int hv_irq_version;

-	for (ent = 1; ent < NR_IRQS; ent++) {
-		if (!irq_table[ent].in_use)
-			break;
-	}
-	if (ent >= NR_IRQS) {
-		printk(KERN_ERR "IRQ: Out of virtual IRQs.\n");
-		ent = 0;
-	} else {
-		irq_table[ent].dev_handle = dev_handle;
-		irq_table[ent].dev_ino = dev_ino;
-		irq_table[ent].in_use = 1;
-	}
-
-	spin_unlock_irqrestore(&irq_alloc_lock, flags);
-
-	return ent;
+/* Major version 2.0 of HV_GRP_INTR added support for the VIRQ cookie
+ * based interfaces, but:
+ *
+ * 1) Several OSs, Solaris and Linux included, use them even when only
+ *    negotiating version 1.0 (or failing to negotiate at all).  So the
+ *    hypervisor has a workaround that provides the VIRQ interfaces even
+ *    when only verion 1.0 of the API is in use.
+ *
+ * 2) Second, and more importantly, with major version 2.0 these VIRQ
+ *    interfaces only were actually hooked up for LDC interrupts, even
+ *    though the Hypervisor specification clearly stated:
+ *
+ *	The new interrupt API functions will be available to a guest
+ *	when it negotiates version 2.0 in the interrupt API group 0x2. When
+ *	a guest negotiates version 2.0, all interrupt sources will only
+ *	support using the cookie interface, and any attempt to use the
+ *	version 1.0 interrupt APIs numbered 0xa0 to 0xa6 will result in the
+ *	ENOTSUPPORTED error being returned.
+ *
+ *   with an emphasis on "all interrupt sources".
+ *
+ * To correct this, major version 3.0 was created which does actually
+ * support VIRQs for all interrupt sources (not just LDC devices).  So
+ * if we want to move completely over the cookie based VIRQs we must
+ * negotiate major version 3.0 or later of HV_GRP_INTR.
+ */
+static bool sun4v_cookie_only_virqs(void)
+{
+	if (hv_irq_version >= 3)
+		return true;
+	return false;
 }

-#ifdef CONFIG_PCI_MSI
-void irq_free(unsigned int irq)
+static void __init irq_init_hv(void)
 {
-	unsigned long flags;
+	unsigned long hv_error, major, minor = 0;

-	if (irq >= NR_IRQS)
+	if (tlb_type != hypervisor)
 		return;

-	spin_lock_irqsave(&irq_alloc_lock, flags);
+	if (hvirq_major)
+		major = hvirq_major;
+	else
+		major = 3;

-	irq_table[irq].in_use = 0;
+	hv_error = sun4v_hvapi_register(HV_GRP_INTR, major, &minor);
+	if (!hv_error)
+		hv_irq_version = major;
+	else
+		hv_irq_version = 1;

-	spin_unlock_irqrestore(&irq_alloc_lock, flags);
+	pr_info("SUN4V: Using IRQ API major %d, cookie only virqs %s\n",
+		hv_irq_version,
+		sun4v_cookie_only_virqs() ? "enabled" : "disabled");
+}
+
+/* This function is for the timer interrupt.*/
+int __init arch_probe_nr_irqs(void)
+{
+	return 1;
+}
+
+#define DEFAULT_NUM_IVECS	(0xfffU)
+static unsigned int nr_ivec = DEFAULT_NUM_IVECS;
+#define NUM_IVECS (nr_ivec)
+
+static unsigned int __init size_nr_ivec(void)
+{
+	if (tlb_type == hypervisor) {
+		switch (sun4v_chip_type) {
+		/* Athena's devhandle|devino is large.*/
+		case SUN4V_CHIP_SPARC64X:
+			nr_ivec = 0xffff;
+			break;
+		}
+	}
+	return nr_ivec;
+}
+
+struct irq_handler_data {
+	union {
+		struct {
+			unsigned int dev_handle;
+			unsigned int dev_ino;
+		};
+		unsigned long sysino;
+	};
+	struct ino_bucket bucket;
+	unsigned long	iclr;
+	unsigned long	imap;
+};
+
+static inline unsigned int irq_data_to_handle(struct irq_data *data)
+{
+	struct irq_handler_data *ihd = data->handler_data;
+
+	return ihd->dev_handle;
+}
+
+static inline unsigned int irq_data_to_ino(struct irq_data *data)
+{
+	struct irq_handler_data *ihd = data->handler_data;
+
+	return ihd->dev_ino;
+}
+
+static inline unsigned long irq_data_to_sysino(struct irq_data *data)
+{
+	struct irq_handler_data *ihd = data->handler_data;
+
+	return ihd->sysino;
+}
+
+void irq_free(unsigned int irq)
+{
+	void *data = irq_get_handler_data(irq);
+
+	kfree(data);
+	irq_set_handler_data(irq, NULL);
+	irq_free_descs(irq, 1);
+}
+
+unsigned int irq_alloc(unsigned int dev_handle, unsigned int dev_ino)
+{
+	int irq;
+
+	irq = __irq_alloc_descs(-1, 1, 1, numa_node_id(), NULL);
+	if (irq <= 0)
+		goto out;
+
+	return irq;
+out:
+	return 0;
+}
+
+static unsigned int cookie_exists(u32 devhandle, unsigned int devino)
+{
+	unsigned long hv_err, cookie;
+	struct ino_bucket *bucket;
+	unsigned int irq = 0U;
+
+	hv_err = sun4v_vintr_get_cookie(devhandle, devino, &cookie);
+	if (hv_err) {
+		pr_err("HV get cookie failed hv_err = %ld\n", hv_err);
+		goto out;
+	}
+
+	if (cookie & ((1UL << 63UL))) {
+		cookie = ~cookie;
+		bucket = (struct ino_bucket *) __va(cookie);
+		irq = bucket->__irq;
+	}
+out:
+	return irq;
+}
+
+static unsigned int sysino_exists(u32 devhandle, unsigned int devino)
+{
+	unsigned long sysino = sun4v_devino_to_sysino(devhandle, devino);
+	struct ino_bucket *bucket;
+	unsigned int irq;
+
+	bucket = &ivector_table[sysino];
+	irq = bucket_get_irq(__pa(bucket));
+
+	return irq;
+}
+
+void ack_bad_irq(unsigned int irq)
+{
+	pr_crit("BAD IRQ ack %d\n", irq);
+}
+
+void irq_install_pre_handler(int irq,
+			     void (*func)(unsigned int, void *, void *),
+			     void *arg1, void *arg2)
+{
+	pr_warn("IRQ pre handler NOT supported.\n");
 }
-#endif

 /*
 * /proc/interrupts printing:
@ -206,15 +345,6 @@ static unsigned int sun4u_compute_tid(unsigned long imap, unsigned long cpuid)
 	return tid;
 }

-struct irq_handler_data {
-	unsigned long	iclr;
-	unsigned long	imap;
-
-	void		(*pre_handler)(unsigned int, void *, void *);
-	void		*arg1;
-	void		*arg2;
-};
-
 #ifdef CONFIG_SMP
 static int irq_choose_cpu(unsigned int irq, const struct cpumask *affinity)
 {
@ -316,8 +446,8 @@ static void sun4u_irq_eoi(struct irq_data *data)

 static void sun4v_irq_enable(struct irq_data *data)
 {
-	unsigned int ino = irq_table[data->irq].dev_ino;
 	unsigned long cpuid = irq_choose_cpu(data->irq, data->affinity);
+	unsigned int ino = irq_data_to_sysino(data);
 	int err;

 	err = sun4v_intr_settarget(ino, cpuid);
@ -337,8 +467,8 @@ static void sun4v_irq_enable(struct irq_data *data)
 static int sun4v_set_affinity(struct irq_data *data,
 			       const struct cpumask *mask, bool force)
 {
-	unsigned int ino = irq_table[data->irq].dev_ino;
 	unsigned long cpuid = irq_choose_cpu(data->irq, mask);
+	unsigned int ino = irq_data_to_sysino(data);
 	int err;

 	err = sun4v_intr_settarget(ino, cpuid);
@ -351,7 +481,7 @@ static int sun4v_set_affinity(struct irq_data *data,

 static void sun4v_irq_disable(struct irq_data *data)
 {
-	unsigned int ino = irq_table[data->irq].dev_ino;
+	unsigned int ino = irq_data_to_sysino(data);
 	int err;

 	err = sun4v_intr_setenabled(ino, HV_INTR_DISABLED);
@ -362,7 +492,7 @@ static void sun4v_irq_disable(struct irq_data *data)

 static void sun4v_irq_eoi(struct irq_data *data)
 {
-	unsigned int ino = irq_table[data->irq].dev_ino;
+	unsigned int ino = irq_data_to_sysino(data);
 	int err;

 	err = sun4v_intr_setstate(ino, HV_INTR_STATE_IDLE);
@ -373,14 +503,13 @@ static void sun4v_irq_eoi(struct irq_data *data)

 static void sun4v_virq_enable(struct irq_data *data)
 {
-	unsigned long cpuid, dev_handle, dev_ino;
+	unsigned long dev_handle = irq_data_to_handle(data);
+	unsigned long dev_ino = irq_data_to_ino(data);
+	unsigned long cpuid;
 	int err;

 	cpuid = irq_choose_cpu(data->irq, data->affinity);

-	dev_handle = irq_table[data->irq].dev_handle;
-	dev_ino = irq_table[data->irq].dev_ino;
-
 	err = sun4v_vintr_set_target(dev_handle, dev_ino, cpuid);
 	if (err != HV_EOK)
 		printk(KERN_ERR "sun4v_vintr_set_target(%lx,%lx,%lu): "
@ -403,14 +532,13 @@ static void sun4v_virq_enable(struct irq_data *data)
 static int sun4v_virt_set_affinity(struct irq_data *data,
 				    const struct cpumask *mask, bool force)
 {
-	unsigned long cpuid, dev_handle, dev_ino;
+	unsigned long dev_handle = irq_data_to_handle(data);
+	unsigned long dev_ino = irq_data_to_ino(data);
+	unsigned long cpuid;
 	int err;

 	cpuid = irq_choose_cpu(data->irq, mask);

-	dev_handle = irq_table[data->irq].dev_handle;
-	dev_ino = irq_table[data->irq].dev_ino;
-
 	err = sun4v_vintr_set_target(dev_handle, dev_ino, cpuid);
 	if (err != HV_EOK)
 		printk(KERN_ERR "sun4v_vintr_set_target(%lx,%lx,%lu): "
@ -422,11 +550,10 @@ static int sun4v_virt_set_affinity(struct irq_data *data,

 static void sun4v_virq_disable(struct irq_data *data)
 {
-	unsigned long dev_handle, dev_ino;
+	unsigned long dev_handle = irq_data_to_handle(data);
+	unsigned long dev_ino = irq_data_to_ino(data);
 	int err;

-	dev_handle = irq_table[data->irq].dev_handle;
-	dev_ino = irq_table[data->irq].dev_ino;

 	err = sun4v_vintr_set_valid(dev_handle, dev_ino,
 				    HV_INTR_DISABLED);
@ -438,12 +565,10 @@ static void sun4v_virq_disable(struct irq_data *data)

 static void sun4v_virq_eoi(struct irq_data *data)
 {
-	unsigned long dev_handle, dev_ino;
+	unsigned long dev_handle = irq_data_to_handle(data);
+	unsigned long dev_ino = irq_data_to_ino(data);
 	int err;

-	dev_handle = irq_table[data->irq].dev_handle;
-	dev_ino = irq_table[data->irq].dev_ino;
-
 	err = sun4v_vintr_set_state(dev_handle, dev_ino,
 				    HV_INTR_STATE_IDLE);
 	if (err != HV_EOK)
@ -479,31 +604,10 @@ static struct irq_chip sun4v_virq = {
 	.flags			= IRQCHIP_EOI_IF_HANDLED,
 };

-static void pre_flow_handler(struct irq_data *d)
-{
-	struct irq_handler_data *handler_data = irq_data_get_irq_handler_data(d);
-	unsigned int ino = irq_table[d->irq].dev_ino;
-
-	handler_data->pre_handler(ino, handler_data->arg1, handler_data->arg2);
-}
-
-void irq_install_pre_handler(int irq,
-			     void (*func)(unsigned int, void *, void *),
-			     void *arg1, void *arg2)
-{
-	struct irq_handler_data *handler_data = irq_get_handler_data(irq);
-
-	handler_data->pre_handler = func;
-	handler_data->arg1 = arg1;
-	handler_data->arg2 = arg2;
-
-	__irq_set_preflow_handler(irq, pre_flow_handler);
-}
-
 unsigned int build_irq(int inofixup, unsigned long iclr, unsigned long imap)
 {
-	struct ino_bucket *bucket;
 	struct irq_handler_data *handler_data;
+	struct ino_bucket *bucket;
 	unsigned int irq;
 	int ino;

@ -537,119 +641,166 @@ out:
 	return irq;
 }

-static unsigned int sun4v_build_common(unsigned long sysino,
-				       struct irq_chip *chip)
+static unsigned int sun4v_build_common(u32 devhandle, unsigned int devino,
+		void (*handler_data_init)(struct irq_handler_data *data,
+		u32 devhandle, unsigned int devino),
+		struct irq_chip *chip)
 {
-	struct ino_bucket *bucket;
-	struct irq_handler_data *handler_data;
+	struct irq_handler_data *data;
 	unsigned int irq;

-	BUG_ON(tlb_type != hypervisor);
-
-	bucket = &ivector_table[sysino];
-	irq = bucket_get_irq(__pa(bucket));
-	if (!irq) {
-		irq = irq_alloc(0, sysino);
-		bucket_set_irq(__pa(bucket), irq);
-		irq_set_chip_and_handler_name(irq, chip, handle_fasteoi_irq,
-					      "IVEC");
-	}
-
-	handler_data = irq_get_handler_data(irq);
-	if (unlikely(handler_data))
+	irq = irq_alloc(devhandle, devino);
+	if (!irq)
 		goto out;

-	handler_data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC);
-	if (unlikely(!handler_data)) {
-		prom_printf("IRQ: kzalloc(irq_handler_data) failed.\n");
-		prom_halt();
+	data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC);
+	if (unlikely(!data)) {
+		pr_err("IRQ handler data allocation failed.\n");
+		irq_free(irq);
+		irq = 0;
+		goto out;
 	}
-	irq_set_handler_data(irq, handler_data);

-	/* Catch accidental accesses to these things.  IMAP/ICLR handling
-	 * is done by hypervisor calls on sun4v platforms, not by direct
-	 * register accesses.
+	irq_set_handler_data(irq, data);
+	handler_data_init(data, devhandle, devino);
+	irq_set_chip_and_handler_name(irq, chip, handle_fasteoi_irq, "IVEC");
+	data->imap = ~0UL;
+	data->iclr = ~0UL;
+out:
+	return irq;
+}
+
+static unsigned long cookie_assign(unsigned int irq, u32 devhandle,
+		unsigned int devino)
+{
+	struct irq_handler_data *ihd = irq_get_handler_data(irq);
+	unsigned long hv_error, cookie;
+
+	/* handler_irq needs to find the irq. cookie is seen signed in
+	 * sun4v_dev_mondo and treated as a non ivector_table delivery.
 	 */
-	handler_data->imap = ~0UL;
-	handler_data->iclr = ~0UL;
+	ihd->bucket.__irq = irq;
+	cookie = ~__pa(&ihd->bucket);

+	hv_error = sun4v_vintr_set_cookie(devhandle, devino, cookie);
+	if (hv_error)
+		pr_err("HV vintr set cookie failed = %ld\n", hv_error);
+
+	return hv_error;
+}
+
+static void cookie_handler_data(struct irq_handler_data *data,
+				u32 devhandle, unsigned int devino)
+{
+	data->dev_handle = devhandle;
+	data->dev_ino = devino;
+}
+
+static unsigned int cookie_build_irq(u32 devhandle, unsigned int devino,
+				     struct irq_chip *chip)
+{
+	unsigned long hv_error;
+	unsigned int irq;
+
+	irq = sun4v_build_common(devhandle, devino, cookie_handler_data, chip);
+
+	hv_error = cookie_assign(irq, devhandle, devino);
+	if (hv_error) {
+		irq_free(irq);
+		irq = 0;
+	}
+
+	return irq;
+}
+
+static unsigned int sun4v_build_cookie(u32 devhandle, unsigned int devino)
+{
+	unsigned int irq;
+
+	irq = cookie_exists(devhandle, devino);
+	if (irq)
+		goto out;
+
+	irq = cookie_build_irq(devhandle, devino, &sun4v_virq);
+
+out:
+	return irq;
+}
+
+static void sysino_set_bucket(unsigned int irq)
+{
+	struct irq_handler_data *ihd = irq_get_handler_data(irq);
+	struct ino_bucket *bucket;
+	unsigned long sysino;
+
+	sysino = sun4v_devino_to_sysino(ihd->dev_handle, ihd->dev_ino);
+	BUG_ON(sysino >= nr_ivec);
+	bucket = &ivector_table[sysino];
+	bucket_set_irq(__pa(bucket), irq);
+}
+
+static void sysino_handler_data(struct irq_handler_data *data,
+				u32 devhandle, unsigned int devino)
+{
+	unsigned long sysino;
+
+	sysino = sun4v_devino_to_sysino(devhandle, devino);
+	data->sysino = sysino;
+}
+
+static unsigned int sysino_build_irq(u32 devhandle, unsigned int devino,
+				     struct irq_chip *chip)
+{
+	unsigned int irq;
+
+	irq = sun4v_build_common(devhandle, devino, sysino_handler_data, chip);
+	if (!irq)
+		goto out;
+
+	sysino_set_bucket(irq);
+out:
+	return irq;
+}
+
+static int sun4v_build_sysino(u32 devhandle, unsigned int devino)
+{
+	int irq;
+
+	irq = sysino_exists(devhandle, devino);
+	if (irq)
+		goto out;
+
+	irq = sysino_build_irq(devhandle, devino, &sun4v_irq);
 out:
 	return irq;
 }

 unsigned int sun4v_build_irq(u32 devhandle, unsigned int devino)
 {
-	unsigned long sysino = sun4v_devino_to_sysino(devhandle, devino);
-
-	return sun4v_build_common(sysino, &sun4v_irq);
-}
-
-unsigned int sun4v_build_virq(u32 devhandle, unsigned int devino)
-{
-	struct irq_handler_data *handler_data;
-	unsigned long hv_err, cookie;
-	struct ino_bucket *bucket;
 	unsigned int irq;

-	bucket = kzalloc(sizeof(struct ino_bucket), GFP_ATOMIC);
-	if (unlikely(!bucket))
-		return 0;
-
-	/* The only reference we store to the IRQ bucket is
-	 * by physical address which kmemleak can't see, tell
-	 * it that this object explicitly is not a leak and
-	 * should be scanned.
-	 */
-	kmemleak_not_leak(bucket);
-
-	__flush_dcache_range((unsigned long) bucket,
-			     ((unsigned long) bucket +
-			      sizeof(struct ino_bucket)));
-
-	irq = irq_alloc(devhandle, devino);
-	bucket_set_irq(__pa(bucket), irq);
-
-	irq_set_chip_and_handler_name(irq, &sun4v_virq, handle_fasteoi_irq,
-				      "IVEC");
-
-	handler_data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC);
-	if (unlikely(!handler_data))
-		return 0;
-
-	/* In order to make the LDC channel startup sequence easier,
-	 * especially wrt. locking, we do not let request_irq() enable
-	 * the interrupt.
-	 */
-	irq_set_status_flags(irq, IRQ_NOAUTOEN);
-	irq_set_handler_data(irq, handler_data);
-
-	/* Catch accidental accesses to these things.  IMAP/ICLR handling
-	 * is done by hypervisor calls on sun4v platforms, not by direct
-	 * register accesses.
-	 */
-	handler_data->imap = ~0UL;
-	handler_data->iclr = ~0UL;
-
-	cookie = ~__pa(bucket);
-	hv_err = sun4v_vintr_set_cookie(devhandle, devino, cookie);
-	if (hv_err) {
-		prom_printf("IRQ: Fatal, cannot set cookie for [%x:%x] "
-			    "err=%lu\n", devhandle, devino, hv_err);
-		prom_halt();
-	}
+	if (sun4v_cookie_only_virqs())
+		irq = sun4v_build_cookie(devhandle, devino);
+	else
+		irq = sun4v_build_sysino(devhandle, devino);

 	return irq;
 }

-void ack_bad_irq(unsigned int irq)
+unsigned int sun4v_build_virq(u32 devhandle, unsigned int devino)
 {
-	unsigned int ino = irq_table[irq].dev_ino;
+	int irq;

-	if (!ino)
-		ino = 0xdeadbeef;
+	irq = cookie_build_irq(devhandle, devino, &sun4v_virq);
+	if (!irq)
+		goto out;

-	printk(KERN_CRIT "Unexpected IRQ from ino[%x] irq[%u]\n",
-	       ino, irq);
+	/* This is borrowed from the original function.
+	 */
+	irq_set_status_flags(irq, IRQ_NOAUTOEN);
+
+out:
+	return irq;
 }

 void *hardirq_stack[NR_CPUS];
@ -720,9 +871,12 @@ void fixup_irqs(void)

 	for (irq = 0; irq < NR_IRQS; irq++) {
 		struct irq_desc *desc = irq_to_desc(irq);
-		struct irq_data *data = irq_desc_get_irq_data(desc);
+		struct irq_data *data;
 		unsigned long flags;

+		if (!desc)
+			continue;
+		data = irq_desc_get_irq_data(desc);
 		raw_spin_lock_irqsave(&desc->lock, flags);
 		if (desc->action && !irqd_is_per_cpu(data)) {
 			if (data->chip->irq_set_affinity)
@ -922,16 +1076,22 @@ static struct irqaction timer_irq_action = {
 	.name = "timer",
 };

-/* Only invoked on boot processor. */
-void __init init_IRQ(void)
+static void __init irq_ivector_init(void)
 {
-	unsigned long size;
+	unsigned long size, order;
+	unsigned int ivecs;

-	map_prom_timers();
-	kill_prom_timer();
+	/* If we are doing cookie only VIRQs then we do not need the ivector
+	 * table to process interrupts.
+	 */
+	if (sun4v_cookie_only_virqs())
+		return;

-	size = sizeof(struct ino_bucket) * NUM_IVECS;
-	ivector_table = kzalloc(size, GFP_KERNEL);
+	ivecs = size_nr_ivec();
+	size = sizeof(struct ino_bucket) * ivecs;
+	order = get_order(size);
+	ivector_table = (struct ino_bucket *)
+		__get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
 	if (!ivector_table) {
 		prom_printf("Fatal error, cannot allocate ivector_table\n");
 		prom_halt();
@ -940,6 +1100,15 @@ void __init init_IRQ(void)
 			     ((unsigned long) ivector_table) + size);

 	ivector_table_pa = __pa(ivector_table);
+}
+
+/* Only invoked on boot processor.*/
+void __init init_IRQ(void)
+{
+	irq_init_hv();
+	irq_ivector_init();
+	map_prom_timers();
+	kill_prom_timer();

 	if (tlb_type == hypervisor)
 		sun4v_init_mondo_queues();
--- a/arch/sparc/kernel/ktlb.S
+++ b/arch/sparc/kernel/ktlb.S
@ -47,14 +47,6 @@ kvmap_itlb_vmalloc_addr:
 	KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_itlb_longpath)

 	TSB_LOCK_TAG(%g1, %g2, %g7)
-
-	/* Load and check PTE.  */
-	ldxa		[%g5] ASI_PHYS_USE_EC, %g5
-	mov		1, %g7
-	sllx		%g7, TSB_TAG_INVALID_BIT, %g7
-	brgez,a,pn	%g5, kvmap_itlb_longpath
-	 TSB_STORE(%g1, %g7)
-
 	TSB_WRITE(%g1, %g5, %g6)

 	/* fallthrough to TLB load */
@ -118,6 +110,12 @@ kvmap_dtlb_obp:
 	ba,pt		%xcc, kvmap_dtlb_load
 	 nop

+kvmap_linear_early:
+	sethi		%hi(kern_linear_pte_xor), %g7
+	ldx		[%g7 + %lo(kern_linear_pte_xor)], %g2
+	ba,pt		%xcc, kvmap_dtlb_tsb4m_load
+	 xor		%g2, %g4, %g5
+
 	.align		32
 kvmap_dtlb_tsb4m_load:
 	TSB_LOCK_TAG(%g1, %g2, %g7)
@ -146,105 +144,17 @@ kvmap_dtlb_4v:
 	/* Correct TAG_TARGET is already in %g6, check 4mb TSB.  */
 	KERN_TSB4M_LOOKUP_TL1(%g6, %g5, %g1, %g2, %g3, kvmap_dtlb_load)
 #endif
-	/* TSB entry address left in %g1, lookup linear PTE.
-	 * Must preserve %g1 and %g6 (TAG).
+	/* Linear mapping TSB lookup failed.  Fallthrough to kernel
+	 * page table based lookup.
 	 */
-kvmap_dtlb_tsb4m_miss:
-	/* Clear the PAGE_OFFSET top virtual bits, shift
-	 * down to get PFN, and make sure PFN is in range.
-	 */
-661:	sllx		%g4, 0, %g5
-	.section	.page_offset_shift_patch, "ax"
-	.word		661b
-	.previous
-
-	/* Check to see if we know about valid memory at the 4MB
-	 * chunk this physical address will reside within.
-	 */
-661:	srlx		%g5, MAX_PHYS_ADDRESS_BITS, %g2
-	.section	.page_offset_shift_patch, "ax"
-	.word		661b
-	.previous
-
-	brnz,pn		%g2, kvmap_dtlb_longpath
-	 nop
-
-	/* This unconditional branch and delay-slot nop gets patched
-	 * by the sethi sequence once the bitmap is properly setup.
-	 */
-	.globl		valid_addr_bitmap_insn
-valid_addr_bitmap_insn:
-	ba,pt		%xcc, 2f
-	 nop
-	.subsection	2
-	.globl		valid_addr_bitmap_patch
-valid_addr_bitmap_patch:
-	sethi		%hi(sparc64_valid_addr_bitmap), %g7
-	or		%g7, %lo(sparc64_valid_addr_bitmap), %g7
-	.previous
-
-661:	srlx		%g5, ILOG2_4MB, %g2
-	.section	.page_offset_shift_patch, "ax"
-	.word		661b
-	.previous
-
-	srlx		%g2, 6, %g5
-	and		%g2, 63, %g2
-	sllx		%g5, 3, %g5
-	ldx		[%g7 + %g5], %g5
-	mov		1, %g7
-	sllx		%g7, %g2, %g7
-	andcc		%g5, %g7, %g0
-	be,pn		%xcc, kvmap_dtlb_longpath
-
-2:	 sethi		%hi(kpte_linear_bitmap), %g2
-
-	/* Get the 256MB physical address index. */
-661:	sllx		%g4, 0, %g5
-	.section	.page_offset_shift_patch, "ax"
-	.word		661b
-	.previous
-
-	or		%g2, %lo(kpte_linear_bitmap), %g2
-
-661:	srlx		%g5, ILOG2_256MB, %g5
-	.section	.page_offset_shift_patch, "ax"
-	.word		661b
-	.previous
-
-	and		%g5, (32 - 1), %g7
-
-	/* Divide by 32 to get the offset into the bitmask.  */
-	srlx		%g5, 5, %g5
-	add		%g7, %g7, %g7
-	sllx		%g5, 3, %g5
-
-	/* kern_linear_pte_xor[(mask >> shift) & 3)] */
-	ldx		[%g2 + %g5], %g2
-	srlx		%g2, %g7, %g7
-	sethi		%hi(kern_linear_pte_xor), %g5
-	and		%g7, 3, %g7
-	or		%g5, %lo(kern_linear_pte_xor), %g5
-	sllx		%g7, 3, %g7
-	ldx		[%g5 + %g7], %g2
-
 	.globl		kvmap_linear_patch
 kvmap_linear_patch:
-	ba,pt		%xcc, kvmap_dtlb_tsb4m_load
-	 xor		%g2, %g4, %g5
+	ba,a,pt		%xcc, kvmap_linear_early

 kvmap_dtlb_vmalloc_addr:
 	KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_dtlb_longpath)

 	TSB_LOCK_TAG(%g1, %g2, %g7)
-
-	/* Load and check PTE.  */
-	ldxa		[%g5] ASI_PHYS_USE_EC, %g5
-	mov		1, %g7
-	sllx		%g7, TSB_TAG_INVALID_BIT, %g7
-	brgez,a,pn	%g5, kvmap_dtlb_longpath
-	 TSB_STORE(%g1, %g7)
-
 	TSB_WRITE(%g1, %g5, %g6)

 	/* fallthrough to TLB load */
@ -276,13 +186,8 @@ kvmap_dtlb_load:

 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 kvmap_vmemmap:
-	sub		%g4, %g5, %g5
-	srlx		%g5, ILOG2_4MB, %g5
-	sethi		%hi(vmemmap_table), %g1
-	sllx		%g5, 3, %g5
-	or		%g1, %lo(vmemmap_table), %g1
-	ba,pt		%xcc, kvmap_dtlb_load
-	 ldx		[%g1 + %g5], %g5
+	KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_dtlb_longpath)
+	ba,a,pt		%xcc, kvmap_dtlb_load
 #endif

 kvmap_dtlb_nonlinear:
@ -294,8 +199,8 @@ kvmap_dtlb_nonlinear:

 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 	/* Do not use the TSB for vmemmap.  */
-	mov		(VMEMMAP_BASE >> 40), %g5
-	sllx		%g5, 40, %g5
+	sethi		%hi(VMEMMAP_BASE), %g5
+	ldx		[%g5 + %lo(VMEMMAP_BASE)], %g5
 	cmp		%g4,%g5
 	bgeu,pn		%xcc, kvmap_vmemmap
 	 nop
@ -307,8 +212,8 @@ kvmap_dtlb_tsbmiss:
 	sethi		%hi(MODULES_VADDR), %g5
 	cmp		%g4, %g5
 	blu,pn		%xcc, kvmap_dtlb_longpath
-	 mov		(VMALLOC_END >> 40), %g5
-	sllx		%g5, 40, %g5
+	 sethi		%hi(VMALLOC_END), %g5
+	ldx		[%g5 + %lo(VMALLOC_END)], %g5
 	cmp		%g4, %g5
 	bgeu,pn		%xcc, kvmap_dtlb_longpath
 	 nop
--- a/arch/sparc/kernel/ldc.c
+++ b/arch/sparc/kernel/ldc.c
@ -1078,7 +1078,8 @@ static void ldc_iommu_release(struct ldc_channel *lp)

 struct ldc_channel *ldc_alloc(unsigned long id,
 			      const struct ldc_channel_config *cfgp,
-			      void *event_arg)
+			      void *event_arg,
+			      const char *name)
 {
 	struct ldc_channel *lp;
 	const struct ldc_mode_ops *mops;
@ -1093,6 +1094,8 @@ struct ldc_channel *ldc_alloc(unsigned long id,
 	err = -EINVAL;
 	if (!cfgp)
 		goto out_err;
+	if (!name)
+		goto out_err;

 	switch (cfgp->mode) {
 	case LDC_MODE_RAW:
@ -1185,6 +1188,21 @@ struct ldc_channel *ldc_alloc(unsigned long id,

 	INIT_HLIST_HEAD(&lp->mh_list);

+	snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name);
+	snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name);
+
+	err = request_irq(lp->cfg.rx_irq, ldc_rx, 0,
+			  lp->rx_irq_name, lp);
+	if (err)
+		goto out_free_txq;
+
+	err = request_irq(lp->cfg.tx_irq, ldc_tx, 0,
+			  lp->tx_irq_name, lp);
+	if (err) {
+		free_irq(lp->cfg.rx_irq, lp);
+		goto out_free_txq;
+	}
+
 	return lp;

 out_free_txq:
@ -1237,31 +1255,14 @@ EXPORT_SYMBOL(ldc_free);
 * state.  This does not initiate a handshake, ldc_connect() does
 * that.
 */
-int ldc_bind(struct ldc_channel *lp, const char *name)
+int ldc_bind(struct ldc_channel *lp)
 {
 	unsigned long hv_err, flags;
 	int err = -EINVAL;

-	if (!name ||
-	    (lp->state != LDC_STATE_INIT))
+	if (lp->state != LDC_STATE_INIT)
 		return -EINVAL;

-	snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name);
-	snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name);
-
-	err = request_irq(lp->cfg.rx_irq, ldc_rx, 0,
-			  lp->rx_irq_name, lp);
-	if (err)
-		return err;
-
-	err = request_irq(lp->cfg.tx_irq, ldc_tx, 0,
-			  lp->tx_irq_name, lp);
-	if (err) {
-		free_irq(lp->cfg.rx_irq, lp);
-		return err;
-	}
-
-
 	spin_lock_irqsave(&lp->lock, flags);

 	enable_irq(lp->cfg.rx_irq);
--- a/arch/sparc/kernel/pcr.c
+++ b/arch/sparc/kernel/pcr.c
@ -191,12 +191,41 @@ static const struct pcr_ops n4_pcr_ops = {
 	.pcr_nmi_disable	= PCR_N4_PICNPT,
 };

+static u64 n5_pcr_read(unsigned long reg_num)
+{
+	unsigned long val;
+
+	(void) sun4v_t5_get_perfreg(reg_num, &val);
+
+	return val;
+}
+
+static void n5_pcr_write(unsigned long reg_num, u64 val)
+{
+	(void) sun4v_t5_set_perfreg(reg_num, val);
+}
+
+static const struct pcr_ops n5_pcr_ops = {
+	.read_pcr		= n5_pcr_read,
+	.write_pcr		= n5_pcr_write,
+	.read_pic		= n4_pic_read,
+	.write_pic		= n4_pic_write,
+	.nmi_picl_value		= n4_picl_value,
+	.pcr_nmi_enable		= (PCR_N4_PICNPT | PCR_N4_STRACE |
+				   PCR_N4_UTRACE | PCR_N4_TOE |
+				   (26 << PCR_N4_SL_SHIFT)),
+	.pcr_nmi_disable	= PCR_N4_PICNPT,
+};
+
+
 static unsigned long perf_hsvc_group;
 static unsigned long perf_hsvc_major;
 static unsigned long perf_hsvc_minor;

 static int __init register_perf_hsvc(void)
 {
+	unsigned long hverror;
+
 	if (tlb_type == hypervisor) {
 		switch (sun4v_chip_type) {
 		case SUN4V_CHIP_NIAGARA1:
@ -215,6 +244,10 @@ static int __init register_perf_hsvc(void)
 			perf_hsvc_group = HV_GRP_VT_CPU;
 			break;

+		case SUN4V_CHIP_NIAGARA5:
+			perf_hsvc_group = HV_GRP_T5_CPU;
+			break;
+
 		default:
 			return -ENODEV;
 		}
@ -222,10 +255,12 @@ static int __init register_perf_hsvc(void)

 		perf_hsvc_major = 1;
 		perf_hsvc_minor = 0;
-		if (sun4v_hvapi_register(perf_hsvc_group,
-					 perf_hsvc_major,
-					 &perf_hsvc_minor)) {
-			printk("perfmon: Could not register hvapi.\n");
+		hverror = sun4v_hvapi_register(perf_hsvc_group,
+					       perf_hsvc_major,
+					       &perf_hsvc_minor);
+		if (hverror) {
+			pr_err("perfmon: Could not register hvapi(0x%lx).\n",
+			       hverror);
 			return -ENODEV;
 		}
 	}
@ -254,6 +289,10 @@ static int __init setup_sun4v_pcr_ops(void)
 		pcr_ops = &n4_pcr_ops;
 		break;

+	case SUN4V_CHIP_NIAGARA5:
+		pcr_ops = &n5_pcr_ops;
+		break;
+
 	default:
 		ret = -ENODEV;
 		break;
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@ -1662,7 +1662,8 @@ static bool __init supported_pmu(void)
 		sparc_pmu = &niagara2_pmu;
 		return true;
 	}
-	if (!strcmp(sparc_pmu_type, "niagara4")) {
+	if (!strcmp(sparc_pmu_type, "niagara4") ||
+	    !strcmp(sparc_pmu_type, "niagara5")) {
 		sparc_pmu = &niagara4_pmu;
 		return true;
 	}
--- a/arch/sparc/kernel/setup_64.c
+++ b/arch/sparc/kernel/setup_64.c
@ -30,6 +30,7 @@
 #include <linux/cpu.h>
 #include <linux/initrd.h>
 #include <linux/module.h>
+#include <linux/start_kernel.h>

 #include <asm/io.h>
 #include <asm/processor.h>
@ -174,7 +175,7 @@ char reboot_command[COMMAND_LINE_SIZE];

 static struct pt_regs fake_swapper_regs = { { 0, }, 0, 0, 0, 0 };

-void __init per_cpu_patch(void)
+static void __init per_cpu_patch(void)
 {
 	struct cpuid_patch_entry *p;
 	unsigned long ver;
@ -266,7 +267,7 @@ void sun4v_patch_2insn_range(struct sun4v_2insn_patch_entry *start,
 	}
 }

-void __init sun4v_patch(void)
+static void __init sun4v_patch(void)
 {
 	extern void sun4v_hvapi_init(void);

@ -335,14 +336,25 @@ static void __init pause_patch(void)
 	}
 }

-#ifdef CONFIG_SMP
-void __init boot_cpu_id_too_large(int cpu)
+void __init start_early_boot(void)
 {
-	prom_printf("Serious problem, boot cpu id (%d) >= NR_CPUS (%d)\n",
-		    cpu, NR_CPUS);
-	prom_halt();
+	int cpu;
+
+	check_if_starfire();
+	per_cpu_patch();
+	sun4v_patch();
+
+	cpu = hard_smp_processor_id();
+	if (cpu >= NR_CPUS) {
+		prom_printf("Serious problem, boot cpu id (%d) >= NR_CPUS (%d)\n",
+			    cpu, NR_CPUS);
+		prom_halt();
+	}
+	current_thread_info()->cpu = cpu;
+
+	prom_init_report();
+	start_kernel();
 }
-#endif

 /* On Ultra, we support all of the v8 capabilities. */
 unsigned long sparc64_elf_hwcap = (HWCAP_SPARC_FLUSH | HWCAP_SPARC_STBAR |
@ -500,12 +512,16 @@ static void __init init_sparc64_elf_hwcap(void)
 		    sun4v_chip_type == SUN4V_CHIP_NIAGARA3 ||
 		    sun4v_chip_type == SUN4V_CHIP_NIAGARA4 ||
 		    sun4v_chip_type == SUN4V_CHIP_NIAGARA5 ||
+		    sun4v_chip_type == SUN4V_CHIP_SPARC_M6 ||
+		    sun4v_chip_type == SUN4V_CHIP_SPARC_M7 ||
 		    sun4v_chip_type == SUN4V_CHIP_SPARC64X)
 			cap |= HWCAP_SPARC_BLKINIT;
 		if (sun4v_chip_type == SUN4V_CHIP_NIAGARA2 ||
 		    sun4v_chip_type == SUN4V_CHIP_NIAGARA3 ||
 		    sun4v_chip_type == SUN4V_CHIP_NIAGARA4 ||
 		    sun4v_chip_type == SUN4V_CHIP_NIAGARA5 ||
+		    sun4v_chip_type == SUN4V_CHIP_SPARC_M6 ||
+		    sun4v_chip_type == SUN4V_CHIP_SPARC_M7 ||
 		    sun4v_chip_type == SUN4V_CHIP_SPARC64X)
 			cap |= HWCAP_SPARC_N2;
 	}
@ -533,6 +549,8 @@ static void __init init_sparc64_elf_hwcap(void)
 			    sun4v_chip_type == SUN4V_CHIP_NIAGARA3 ||
 			    sun4v_chip_type == SUN4V_CHIP_NIAGARA4 ||
 			    sun4v_chip_type == SUN4V_CHIP_NIAGARA5 ||
+			    sun4v_chip_type == SUN4V_CHIP_SPARC_M6 ||
+			    sun4v_chip_type == SUN4V_CHIP_SPARC_M7 ||
 			    sun4v_chip_type == SUN4V_CHIP_SPARC64X)
 				cap |= (AV_SPARC_VIS | AV_SPARC_VIS2 |
 					AV_SPARC_ASI_BLK_INIT |
@ -540,6 +558,8 @@ static void __init init_sparc64_elf_hwcap(void)
 			if (sun4v_chip_type == SUN4V_CHIP_NIAGARA3 ||
 			    sun4v_chip_type == SUN4V_CHIP_NIAGARA4 ||
 			    sun4v_chip_type == SUN4V_CHIP_NIAGARA5 ||
+			    sun4v_chip_type == SUN4V_CHIP_SPARC_M6 ||
+			    sun4v_chip_type == SUN4V_CHIP_SPARC_M7 ||
 			    sun4v_chip_type == SUN4V_CHIP_SPARC64X)
 				cap |= (AV_SPARC_VIS3 | AV_SPARC_HPC |
 					AV_SPARC_FMAF);
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@ -1467,6 +1467,13 @@ static void __init pcpu_populate_pte(unsigned long addr)
 	pud_t *pud;
 	pmd_t *pmd;

+	if (pgd_none(*pgd)) {
+		pud_t *new;
+
+		new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+		pgd_populate(&init_mm, pgd, new);
+	}
+
 	pud = pud_offset(pgd, addr);
 	if (pud_none(*pud)) {
 		pmd_t *new;
--- a/arch/sparc/kernel/sun4v_tlb_miss.S
+++ b/arch/sparc/kernel/sun4v_tlb_miss.S
@ -195,6 +195,11 @@ sun4v_tsb_miss_common:
 	 ldx	[%g2 + TRAP_PER_CPU_PGD_PADDR], %g7

 sun4v_itlb_error:
+	rdpr	%tl, %g1
+	cmp	%g1, 1
+	ble,pt	%icc, sun4v_bad_ra
+	 or	%g0, FAULT_CODE_BAD_RA | FAULT_CODE_ITLB, %g1
+
 	sethi	%hi(sun4v_err_itlb_vaddr), %g1
 	stx	%g4, [%g1 + %lo(sun4v_err_itlb_vaddr)]
 	sethi	%hi(sun4v_err_itlb_ctx), %g1
@ -206,15 +211,10 @@ sun4v_itlb_error:
 	sethi	%hi(sun4v_err_itlb_error), %g1
 	stx	%o0, [%g1 + %lo(sun4v_err_itlb_error)]

+	sethi	%hi(1f), %g7
 	rdpr	%tl, %g4
-	cmp	%g4, 1
-	ble,pt	%icc, 1f
-	 sethi	%hi(2f), %g7
 	ba,pt	%xcc, etraptl1
-	 or	%g7, %lo(2f), %g7
-
-1:	ba,pt	%xcc, etrap
-2:	 or	%g7, %lo(2b), %g7
+1:	 or	%g7, %lo(1f), %g7
 	mov	%l4, %o1
 	call	sun4v_itlb_error_report
 	 add	%sp, PTREGS_OFF, %o0
@ -222,6 +222,11 @@ sun4v_itlb_error:
 	/* NOTREACHED */

 sun4v_dtlb_error:
+	rdpr	%tl, %g1
+	cmp	%g1, 1
+	ble,pt	%icc, sun4v_bad_ra
+	 or	%g0, FAULT_CODE_BAD_RA | FAULT_CODE_DTLB, %g1
+
 	sethi	%hi(sun4v_err_dtlb_vaddr), %g1
 	stx	%g4, [%g1 + %lo(sun4v_err_dtlb_vaddr)]
 	sethi	%hi(sun4v_err_dtlb_ctx), %g1
@ -233,21 +238,23 @@ sun4v_dtlb_error:
 	sethi	%hi(sun4v_err_dtlb_error), %g1
 	stx	%o0, [%g1 + %lo(sun4v_err_dtlb_error)]

+	sethi	%hi(1f), %g7
 	rdpr	%tl, %g4
-	cmp	%g4, 1
-	ble,pt	%icc, 1f
-	 sethi	%hi(2f), %g7
 	ba,pt	%xcc, etraptl1
-	 or	%g7, %lo(2f), %g7
-
-1:	ba,pt	%xcc, etrap
-2:	 or	%g7, %lo(2b), %g7
+1:	 or	%g7, %lo(1f), %g7
 	mov	%l4, %o1
 	call	sun4v_dtlb_error_report
 	 add	%sp, PTREGS_OFF, %o0

 	/* NOTREACHED */

+sun4v_bad_ra:
+	or	%g0, %g4, %g5
+	ba,pt	%xcc, sparc64_realfault_common
+	 or	%g1, %g0, %g4
+
+	/* NOTREACHED */
+
 	/* Instruction Access Exception, tl0. */
 sun4v_iacc:
 	ldxa	[%g0] ASI_SCRATCHPAD, %g2
--- a/arch/sparc/kernel/trampoline_64.S
+++ b/arch/sparc/kernel/trampoline_64.S
@ -109,10 +109,13 @@ startup_continue:
 	brnz,pn		%g1, 1b
 	 nop

-	sethi		%hi(p1275buf), %g2
-	or		%g2, %lo(p1275buf), %g2
-	ldx		[%g2 + 0x10], %l2
-	add		%l2, -(192 + 128), %sp
+	/* Get onto temporary stack which will be in the locked
+	 * kernel image.
+	 */
+	sethi		%hi(tramp_stack), %g1
+	or		%g1, %lo(tramp_stack), %g1
+	add		%g1, TRAMP_STACK_SIZE, %g1
+	sub		%g1, STACKFRAME_SZ + STACK_BIAS + 256, %sp
 	flushw

 	/* Setup the loop variables:
@ -394,7 +397,6 @@ after_lock_tlb:
 	sllx		%g5, THREAD_SHIFT, %g5
 	sub		%g5, (STACKFRAME_SZ + STACK_BIAS), %g5
 	add		%g6, %g5, %sp
-	mov		0, %fp

 	rdpr		%pstate, %o1
 	or		%o1, PSTATE_IE, %o1
--- a/arch/sparc/kernel/traps_64.c
+++ b/arch/sparc/kernel/traps_64.c
@ -2104,6 +2104,11 @@ void sun4v_nonresum_overflow(struct pt_regs *regs)
 	atomic_inc(&sun4v_nonresum_oflow_cnt);
 }

+static void sun4v_tlb_error(struct pt_regs *regs)
+{
+	die_if_kernel("TLB/TSB error", regs);
+}
+
 unsigned long sun4v_err_itlb_vaddr;
 unsigned long sun4v_err_itlb_ctx;
 unsigned long sun4v_err_itlb_pte;
@ -2111,8 +2116,7 @@ unsigned long sun4v_err_itlb_error;

 void sun4v_itlb_error_report(struct pt_regs *regs, int tl)
 {
-	if (tl > 1)
-		dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+	dump_tl1_traplog((struct tl1_traplog *)(regs + 1));

 	printk(KERN_EMERG "SUN4V-ITLB: Error at TPC[%lx], tl %d\n",
 	       regs->tpc, tl);
@ -2125,7 +2129,7 @@ void sun4v_itlb_error_report(struct pt_regs *regs, int tl)
 	       sun4v_err_itlb_vaddr, sun4v_err_itlb_ctx,
 	       sun4v_err_itlb_pte, sun4v_err_itlb_error);

-	prom_halt();
+	sun4v_tlb_error(regs);
 }

 unsigned long sun4v_err_dtlb_vaddr;
@ -2135,8 +2139,7 @@ unsigned long sun4v_err_dtlb_error;

 void sun4v_dtlb_error_report(struct pt_regs *regs, int tl)
 {
-	if (tl > 1)
-		dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+	dump_tl1_traplog((struct tl1_traplog *)(regs + 1));

 	printk(KERN_EMERG "SUN4V-DTLB: Error at TPC[%lx], tl %d\n",
 	       regs->tpc, tl);
@ -2149,7 +2152,7 @@ void sun4v_dtlb_error_report(struct pt_regs *regs, int tl)
 	       sun4v_err_dtlb_vaddr, sun4v_err_dtlb_ctx,
 	       sun4v_err_dtlb_pte, sun4v_err_dtlb_error);

-	prom_halt();
+	sun4v_tlb_error(regs);
 }

 void hypervisor_tlbop_error(unsigned long err, unsigned long op)
--- a/arch/sparc/kernel/tsb.S
+++ b/arch/sparc/kernel/tsb.S
@ -162,10 +162,10 @@ tsb_miss_page_table_walk_sun4v_fastpath:
 	nop
 	.previous

-	rdpr	%tl, %g3
-	cmp	%g3, 1
+	rdpr	%tl, %g7
+	cmp	%g7, 1
 	bne,pn	%xcc, winfix_trampoline
-	 nop
+	 mov	%g3, %g4
 	ba,pt	%xcc, etrap
 	 rd	%pc, %g7
 	call	hugetlb_setup
--- a/arch/sparc/kernel/viohs.c
+++ b/arch/sparc/kernel/viohs.c
@ -714,7 +714,7 @@ int vio_ldc_alloc(struct vio_driver_state *vio,
 	cfg.tx_irq = vio->vdev->tx_irq;
 	cfg.rx_irq = vio->vdev->rx_irq;

-	lp = ldc_alloc(vio->vdev->channel_id, &cfg, event_arg);
+	lp = ldc_alloc(vio->vdev->channel_id, &cfg, event_arg, vio->name);
 	if (IS_ERR(lp))
 		return PTR_ERR(lp);

@ -746,7 +746,7 @@ void vio_port_up(struct vio_driver_state *vio)

 	err = 0;
 	if (state == LDC_STATE_INIT) {
-		err = ldc_bind(vio->lp, vio->name);
+		err = ldc_bind(vio->lp);
 		if (err)
 			printk(KERN_WARNING "%s: Port %lu bind failed, "
 			       "err=%d\n",
--- a/arch/sparc/kernel/vmlinux.lds.S
+++ b/arch/sparc/kernel/vmlinux.lds.S
@ -35,8 +35,9 @@ jiffies = jiffies_64;

 SECTIONS
 {
-	/* swapper_low_pmd_dir is sparc64 only */
-	swapper_low_pmd_dir = 0x0000000000402000;
+#ifdef CONFIG_SPARC64
+	swapper_pg_dir = 0x0000000000402000;
+#endif
 	. = INITIAL_ADDRESS;
 	.text TEXTSTART :
 	{
@ -122,11 +123,6 @@ SECTIONS
 		*(.swapper_4m_tsb_phys_patch)
 		__swapper_4m_tsb_phys_patch_end = .;
 	}
-	.page_offset_shift_patch : {
-		__page_offset_shift_patch = .;
-		*(.page_offset_shift_patch)
-		__page_offset_shift_patch_end = .;
-	}
 	.popc_3insn_patch : {
 		__popc_3insn_patch = .;
 		*(.popc_3insn_patch)
--- a/arch/sparc/lib/NG4memcpy.S
+++ b/arch/sparc/lib/NG4memcpy.S
@ -41,6 +41,10 @@
 #endif
 #endif

+#if !defined(EX_LD) && !defined(EX_ST)
+#define NON_USER_COPY
+#endif
+
 #ifndef EX_LD
 #define EX_LD(x)	x
 #endif
@ -197,9 +201,13 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 	 mov		EX_RETVAL(%o3), %o0

 .Llarge_src_unaligned:
+#ifdef NON_USER_COPY
+	VISEntryHalfFast(.Lmedium_vis_entry_fail)
+#else
+	VISEntryHalf
+#endif
 	andn		%o2, 0x3f, %o4
 	sub		%o2, %o4, %o2
-	VISEntryHalf
 	alignaddr	%o1, %g0, %g1
 	add		%o1, %o4, %o1
 	EX_LD(LOAD(ldd, %g1 + 0x00, %f0))
@ -240,6 +248,10 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 	 nop
 	ba,a,pt		%icc, .Lmedium_unaligned

+#ifdef NON_USER_COPY
+.Lmedium_vis_entry_fail:
+	 or		%o0, %o1, %g2
+#endif
 .Lmedium:
 	LOAD(prefetch, %o1 + 0x40, #n_reads_strong)
 	andcc		%g2, 0x7, %g0
--- a/arch/sparc/lib/memset.S
+++ b/arch/sparc/lib/memset.S
@ -3,8 +3,9 @@
 * Copyright (C) 1996,1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
 * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
 *
- * Returns 0, if ok, and number of bytes not yet set if exception
- * occurs and we were called as clear_user.
+ * Calls to memset returns initial %o0. Calls to bzero returns 0, if ok, and
+ * number of bytes not yet set if exception occurs and we were called as
+ * clear_user.
 */

 #include <asm/ptrace.h>
@ -65,6 +66,8 @@ __bzero_begin:
 	.globl	__memset_start, __memset_end
 __memset_start:
 memset:
+	mov	%o0, %g1
+	mov	1, %g4
 	and	%o1, 0xff, %g3
 	sll	%g3, 8, %g2
 	or	%g3, %g2, %g3
@ -89,6 +92,7 @@ memset:
 	 sub	%o0, %o2, %o0

 __bzero:
+	clr	%g4
 	mov	%g0, %g3
 1:
 	cmp	%o1, 7
@ -151,8 +155,8 @@ __bzero:
 	bne,a	8f
 	 EX(stb	%g3, [%o0], and %o1, 1)
 8:
-	retl
-	 clr	%o0
+	b	0f
+	 nop
 7:
 	be	13b
 	 orcc	%o1, 0, %g0
@ -164,6 +168,12 @@ __bzero:
 	bne	8b
 	 EX(stb	%g3, [%o0 - 1], add %o1, 1)
 0:
+	andcc	%g4, 1, %g0
+	be	5f
+	 nop
+	retl
+	 mov	%g1, %o0
+5:
 	retl
 	 clr	%o0
 __memset_end:
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@ -346,6 +346,9 @@ retry:
 		down_read(&mm->mmap_sem);
 	}

+	if (fault_code & FAULT_CODE_BAD_RA)
+		goto do_sigbus;
+
 	vma = find_vma(mm, address);
 	if (!vma)
 		goto bad_area;
--- a/arch/sparc/mm/gup.c
+++ b/arch/sparc/mm/gup.c
@ -160,6 +160,36 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
 	return 1;
 }

+int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
+			  struct page **pages)
+{
+	struct mm_struct *mm = current->mm;
+	unsigned long addr, len, end;
+	unsigned long next, flags;
+	pgd_t *pgdp;
+	int nr = 0;
+
+	start &= PAGE_MASK;
+	addr = start;
+	len = (unsigned long) nr_pages << PAGE_SHIFT;
+	end = start + len;
+
+	local_irq_save(flags);
+	pgdp = pgd_offset(mm, addr);
+	do {
+		pgd_t pgd = *pgdp;
+
+		next = pgd_addr_end(addr, end);
+		if (pgd_none(pgd))
+			break;
+		if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
+			break;
+	} while (pgdp++, addr = next, addr != end);
+	local_irq_restore(flags);
+
+	return nr;
+}
+
 int get_user_pages_fast(unsigned long start, int nr_pages, int write,
 			struct page **pages)
 {
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@ -75,7 +75,6 @@ unsigned long kern_linear_pte_xor[4] __read_mostly;
 * 'cpu' properties, but we need to have this table setup before the
 * MDESC is initialized.
 */
-unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)];

 #ifndef CONFIG_DEBUG_PAGEALLOC
 /* A special kernel TSB for 4MB, 256MB, 2GB and 16GB linear mappings.
@ -84,10 +83,11 @@ unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)];
 */
 extern struct tsb swapper_4m_tsb[KERNEL_TSB4M_NENTRIES];
 #endif
+extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES];

 static unsigned long cpu_pgsz_mask;

-#define MAX_BANKS	32
+#define MAX_BANKS	1024

 static struct linux_prom64_registers pavail[MAX_BANKS];
 static int pavail_ents;
@ -165,10 +165,6 @@ static void __init read_obp_memory(const char *property,
 	     cmp_p64, NULL);
 }

-unsigned long sparc64_valid_addr_bitmap[VALID_ADDR_BITMAP_BYTES /
-					sizeof(unsigned long)];
-EXPORT_SYMBOL(sparc64_valid_addr_bitmap);
-
 /* Kernel physical address base and size in bytes.  */
 unsigned long kern_base __read_mostly;
 unsigned long kern_size __read_mostly;
@ -840,7 +836,10 @@ static int find_node(unsigned long addr)
 		if ((addr & p->mask) == p->val)
 			return i;
 	}
-	return -1;
+	/* The following condition has been observed on LDOM guests.*/
+	WARN_ONCE(1, "find_node: A physical address doesn't match a NUMA node"
+		" rule. Some physical memory will be owned by node 0.");
+	return 0;
 }

 static u64 memblock_nid_range(u64 start, u64 end, int *nid)
@ -1366,9 +1365,144 @@ static unsigned long __init bootmem_init(unsigned long phys_base)
 static struct linux_prom64_registers pall[MAX_BANKS] __initdata;
 static int pall_ents __initdata;

-#ifdef CONFIG_DEBUG_PAGEALLOC
+static unsigned long max_phys_bits = 40;
+
+bool kern_addr_valid(unsigned long addr)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	if ((long)addr < 0L) {
+		unsigned long pa = __pa(addr);
+
+		if ((addr >> max_phys_bits) != 0UL)
+			return false;
+
+		return pfn_valid(pa >> PAGE_SHIFT);
+	}
+
+	if (addr >= (unsigned long) KERNBASE &&
+	    addr < (unsigned long)&_end)
+		return true;
+
+	pgd = pgd_offset_k(addr);
+	if (pgd_none(*pgd))
+		return 0;
+
+	pud = pud_offset(pgd, addr);
+	if (pud_none(*pud))
+		return 0;
+
+	if (pud_large(*pud))
+		return pfn_valid(pud_pfn(*pud));
+
+	pmd = pmd_offset(pud, addr);
+	if (pmd_none(*pmd))
+		return 0;
+
+	if (pmd_large(*pmd))
+		return pfn_valid(pmd_pfn(*pmd));
+
+	pte = pte_offset_kernel(pmd, addr);
+	if (pte_none(*pte))
+		return 0;
+
+	return pfn_valid(pte_pfn(*pte));
+}
+EXPORT_SYMBOL(kern_addr_valid);
+
+static unsigned long __ref kernel_map_hugepud(unsigned long vstart,
+					      unsigned long vend,
+					      pud_t *pud)
+{
+	const unsigned long mask16gb = (1UL << 34) - 1UL;
+	u64 pte_val = vstart;
+
+	/* Each PUD is 8GB */
+	if ((vstart & mask16gb) ||
+	    (vend - vstart <= mask16gb)) {
+		pte_val ^= kern_linear_pte_xor[2];
+		pud_val(*pud) = pte_val | _PAGE_PUD_HUGE;
+
+		return vstart + PUD_SIZE;
+	}
+
+	pte_val ^= kern_linear_pte_xor[3];
+	pte_val |= _PAGE_PUD_HUGE;
+
+	vend = vstart + mask16gb + 1UL;
+	while (vstart < vend) {
+		pud_val(*pud) = pte_val;
+
+		pte_val += PUD_SIZE;
+		vstart += PUD_SIZE;
+		pud++;
+	}
+	return vstart;
+}
+
+static bool kernel_can_map_hugepud(unsigned long vstart, unsigned long vend,
+				   bool guard)
+{
+	if (guard && !(vstart & ~PUD_MASK) && (vend - vstart) >= PUD_SIZE)
+		return true;
+
+	return false;
+}
+
+static unsigned long __ref kernel_map_hugepmd(unsigned long vstart,
+					      unsigned long vend,
+					      pmd_t *pmd)
+{
+	const unsigned long mask256mb = (1UL << 28) - 1UL;
+	const unsigned long mask2gb = (1UL << 31) - 1UL;
+	u64 pte_val = vstart;
+
+	/* Each PMD is 8MB */
+	if ((vstart & mask256mb) ||
+	    (vend - vstart <= mask256mb)) {
+		pte_val ^= kern_linear_pte_xor[0];
+		pmd_val(*pmd) = pte_val | _PAGE_PMD_HUGE;
+
+		return vstart + PMD_SIZE;
+	}
+
+	if ((vstart & mask2gb) ||
+	    (vend - vstart <= mask2gb)) {
+		pte_val ^= kern_linear_pte_xor[1];
+		pte_val |= _PAGE_PMD_HUGE;
+		vend = vstart + mask256mb + 1UL;
+	} else {
+		pte_val ^= kern_linear_pte_xor[2];
+		pte_val |= _PAGE_PMD_HUGE;
+		vend = vstart + mask2gb + 1UL;
+	}
+
+	while (vstart < vend) {
+		pmd_val(*pmd) = pte_val;
+
+		pte_val += PMD_SIZE;
+		vstart += PMD_SIZE;
+		pmd++;
+	}
+
+	return vstart;
+}
+
+static bool kernel_can_map_hugepmd(unsigned long vstart, unsigned long vend,
+				   bool guard)
+{
+	if (guard && !(vstart & ~PMD_MASK) && (vend - vstart) >= PMD_SIZE)
+		return true;
+
+	return false;
+}
+
 static unsigned long __ref kernel_map_range(unsigned long pstart,
-					    unsigned long pend, pgprot_t prot)
+					    unsigned long pend, pgprot_t prot,
+					    bool use_huge)
 {
 	unsigned long vstart = PAGE_OFFSET + pstart;
 	unsigned long vend = PAGE_OFFSET + pend;
@ -1387,19 +1521,34 @@ static unsigned long __ref kernel_map_range(unsigned long pstart,
 		pmd_t *pmd;
 		pte_t *pte;

+		if (pgd_none(*pgd)) {
+			pud_t *new;
+
+			new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+			alloc_bytes += PAGE_SIZE;
+			pgd_populate(&init_mm, pgd, new);
+		}
 		pud = pud_offset(pgd, vstart);
 		if (pud_none(*pud)) {
 			pmd_t *new;

+			if (kernel_can_map_hugepud(vstart, vend, use_huge)) {
+				vstart = kernel_map_hugepud(vstart, vend, pud);
+				continue;
+			}
 			new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
 			alloc_bytes += PAGE_SIZE;
 			pud_populate(&init_mm, pud, new);
 		}

 		pmd = pmd_offset(pud, vstart);
-		if (!pmd_present(*pmd)) {
+		if (pmd_none(*pmd)) {
 			pte_t *new;

+			if (kernel_can_map_hugepmd(vstart, vend, use_huge)) {
+				vstart = kernel_map_hugepmd(vstart, vend, pmd);
+				continue;
+			}
 			new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
 			alloc_bytes += PAGE_SIZE;
 			pmd_populate_kernel(&init_mm, pmd, new);
@ -1422,100 +1571,34 @@ static unsigned long __ref kernel_map_range(unsigned long pstart,
 	return alloc_bytes;
 }

+static void __init flush_all_kernel_tsbs(void)
+{
+	int i;
+
+	for (i = 0; i < KERNEL_TSB_NENTRIES; i++) {
+		struct tsb *ent = &swapper_tsb[i];
+
+		ent->tag = (1UL << TSB_TAG_INVALID_BIT);
+	}
+#ifndef CONFIG_DEBUG_PAGEALLOC
+	for (i = 0; i < KERNEL_TSB4M_NENTRIES; i++) {
+		struct tsb *ent = &swapper_4m_tsb[i];
+
+		ent->tag = (1UL << TSB_TAG_INVALID_BIT);
+	}
+#endif
+}
+
 extern unsigned int kvmap_linear_patch[1];
-#endif /* CONFIG_DEBUG_PAGEALLOC */
-
-static void __init kpte_set_val(unsigned long index, unsigned long val)
-{
-	unsigned long *ptr = kpte_linear_bitmap;
-
-	val <<= ((index % (BITS_PER_LONG / 2)) * 2);
-	ptr += (index / (BITS_PER_LONG / 2));
-
-	*ptr |= val;
-}
-
-static const unsigned long kpte_shift_min = 28; /* 256MB */
-static const unsigned long kpte_shift_max = 34; /* 16GB */
-static const unsigned long kpte_shift_incr = 3;
-
-static unsigned long kpte_mark_using_shift(unsigned long start, unsigned long end,
-					   unsigned long shift)
-{
-	unsigned long size = (1UL << shift);
-	unsigned long mask = (size - 1UL);
-	unsigned long remains = end - start;
-	unsigned long val;
-
-	if (remains < size || (start & mask))
-		return start;
-
-	/* VAL maps:
-	 *
-	 *	shift 28 --> kern_linear_pte_xor index 1
-	 *	shift 31 --> kern_linear_pte_xor index 2
-	 *	shift 34 --> kern_linear_pte_xor index 3
-	 */
-	val = ((shift - kpte_shift_min) / kpte_shift_incr) + 1;
-
-	remains &= ~mask;
-	if (shift != kpte_shift_max)
-		remains = size;
-
-	while (remains) {
-		unsigned long index = start >> kpte_shift_min;
-
-		kpte_set_val(index, val);
-
-		start += 1UL << kpte_shift_min;
-		remains -= 1UL << kpte_shift_min;
-	}
-
-	return start;
-}
-
-static void __init mark_kpte_bitmap(unsigned long start, unsigned long end)
-{
-	unsigned long smallest_size, smallest_mask;
-	unsigned long s;
-
-	smallest_size = (1UL << kpte_shift_min);
-	smallest_mask = (smallest_size - 1UL);
-
-	while (start < end) {
-		unsigned long orig_start = start;
-
-		for (s = kpte_shift_max; s >= kpte_shift_min; s -= kpte_shift_incr) {
-			start = kpte_mark_using_shift(start, end, s);
-
-			if (start != orig_start)
-				break;
-		}
-
-		if (start == orig_start)
-			start = (start + smallest_size) & ~smallest_mask;
-	}
-}
-
-static void __init init_kpte_bitmap(void)
-{
-	unsigned long i;
-
-	for (i = 0; i < pall_ents; i++) {
-		unsigned long phys_start, phys_end;
-
-		phys_start = pall[i].phys_addr;
-		phys_end = phys_start + pall[i].reg_size;
-
-		mark_kpte_bitmap(phys_start, phys_end);
-	}
-}

 static void __init kernel_physical_mapping_init(void)
 {
-#ifdef CONFIG_DEBUG_PAGEALLOC
 	unsigned long i, mem_alloced = 0UL;
+	bool use_huge = true;

+#ifdef CONFIG_DEBUG_PAGEALLOC
+	use_huge = false;
+#endif
 	for (i = 0; i < pall_ents; i++) {
 		unsigned long phys_start, phys_end;

@ -1523,7 +1606,7 @@ static void __init kernel_physical_mapping_init(void)
 		phys_end = phys_start + pall[i].reg_size;

 		mem_alloced += kernel_map_range(phys_start, phys_end,
-						PAGE_KERNEL);
+						PAGE_KERNEL, use_huge);
 	}

 	printk("Allocated %ld bytes for kernel page tables.\n",
@ -1532,8 +1615,9 @@ static void __init kernel_physical_mapping_init(void)
 	kvmap_linear_patch[0] = 0x01000000; /* nop */
 	flushi(&kvmap_linear_patch[0]);

+	flush_all_kernel_tsbs();
+
 	__flush_tlb_all();
-#endif
 }

 #ifdef CONFIG_DEBUG_PAGEALLOC
@ -1543,7 +1627,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
 	unsigned long phys_end = phys_start + (numpages * PAGE_SIZE);

 	kernel_map_range(phys_start, phys_end,
-			 (enable ? PAGE_KERNEL : __pgprot(0)));
+			 (enable ? PAGE_KERNEL : __pgprot(0)), false);

 	flush_tsb_kernel_range(PAGE_OFFSET + phys_start,
 			       PAGE_OFFSET + phys_end);
@ -1571,76 +1655,56 @@ unsigned long __init find_ecache_flush_span(unsigned long size)
 unsigned long PAGE_OFFSET;
 EXPORT_SYMBOL(PAGE_OFFSET);

-static void __init page_offset_shift_patch_one(unsigned int *insn, unsigned long phys_bits)
-{
-	unsigned long final_shift;
-	unsigned int val = *insn;
-	unsigned int cnt;
+unsigned long VMALLOC_END   = 0x0000010000000000UL;
+EXPORT_SYMBOL(VMALLOC_END);

-	/* We are patching in ilog2(max_supported_phys_address), and
-	 * we are doing so in a manner similar to a relocation addend.
-	 * That is, we are adding the shift value to whatever value
-	 * is in the shift instruction count field already.
-	 */
-	cnt = (val & 0x3f);
-	val &= ~0x3f;
-
-	/* If we are trying to shift >= 64 bits, clear the destination
-	 * register.  This can happen when phys_bits ends up being equal
-	 * to MAX_PHYS_ADDRESS_BITS.
-	 */
-	final_shift = (cnt + (64 - phys_bits));
-	if (final_shift >= 64) {
-		unsigned int rd = (val >> 25) & 0x1f;
-
-		val = 0x80100000 | (rd << 25);
-	} else {
-		val |= final_shift;
-	}
-	*insn = val;
-
-	__asm__ __volatile__("flush	%0"
-			     : /* no outputs */
-			     : "r" (insn));
-}
-
-static void __init page_offset_shift_patch(unsigned long phys_bits)
-{
-	extern unsigned int __page_offset_shift_patch;
-	extern unsigned int __page_offset_shift_patch_end;
-	unsigned int *p;
-
-	p = &__page_offset_shift_patch;
-	while (p < &__page_offset_shift_patch_end) {
-		unsigned int *insn = (unsigned int *)(unsigned long)*p;
-
-		page_offset_shift_patch_one(insn, phys_bits);
-
-		p++;
-	}
-}
+unsigned long sparc64_va_hole_top =    0xfffff80000000000UL;
+unsigned long sparc64_va_hole_bottom = 0x0000080000000000UL;

 static void __init setup_page_offset(void)
 {
-	unsigned long max_phys_bits = 40;
-
 	if (tlb_type == cheetah || tlb_type == cheetah_plus) {
+		/* Cheetah/Panther support a full 64-bit virtual
+		 * address, so we can use all that our page tables
+		 * support.
+		 */
+		sparc64_va_hole_top =    0xfff0000000000000UL;
+		sparc64_va_hole_bottom = 0x0010000000000000UL;
+
 		max_phys_bits = 42;
 	} else if (tlb_type == hypervisor) {
 		switch (sun4v_chip_type) {
 		case SUN4V_CHIP_NIAGARA1:
 		case SUN4V_CHIP_NIAGARA2:
+			/* T1 and T2 support 48-bit virtual addresses.  */
+			sparc64_va_hole_top =    0xffff800000000000UL;
+			sparc64_va_hole_bottom = 0x0000800000000000UL;
+
 			max_phys_bits = 39;
 			break;
 		case SUN4V_CHIP_NIAGARA3:
+			/* T3 supports 48-bit virtual addresses.  */
+			sparc64_va_hole_top =    0xffff800000000000UL;
+			sparc64_va_hole_bottom = 0x0000800000000000UL;
+
 			max_phys_bits = 43;
 			break;
 		case SUN4V_CHIP_NIAGARA4:
 		case SUN4V_CHIP_NIAGARA5:
 		case SUN4V_CHIP_SPARC64X:
-		default:
+		case SUN4V_CHIP_SPARC_M6:
+			/* T4 and later support 52-bit virtual addresses.  */
+			sparc64_va_hole_top =    0xfff8000000000000UL;
+			sparc64_va_hole_bottom = 0x0008000000000000UL;
 			max_phys_bits = 47;
 			break;
+		case SUN4V_CHIP_SPARC_M7:
+		default:
+			/* M7 and later support 52-bit virtual addresses.  */
+			sparc64_va_hole_top =    0xfff8000000000000UL;
+			sparc64_va_hole_bottom = 0x0008000000000000UL;
+			max_phys_bits = 49;
+			break;
 		}
 	}

@ -1650,12 +1714,16 @@ static void __init setup_page_offset(void)
 		prom_halt();
 	}

-	PAGE_OFFSET = PAGE_OFFSET_BY_BITS(max_phys_bits);
+	PAGE_OFFSET = sparc64_va_hole_top;
+	VMALLOC_END = ((sparc64_va_hole_bottom >> 1) +
+		       (sparc64_va_hole_bottom >> 2));

-	pr_info("PAGE_OFFSET is 0x%016lx (max_phys_bits == %lu)\n",
+	pr_info("MM: PAGE_OFFSET is 0x%016lx (max_phys_bits == %lu)\n",
 		PAGE_OFFSET, max_phys_bits);
-
-	page_offset_shift_patch(max_phys_bits);
+	pr_info("MM: VMALLOC [0x%016lx --> 0x%016lx]\n",
+		VMALLOC_START, VMALLOC_END);
+	pr_info("MM: VMEMMAP [0x%016lx --> 0x%016lx]\n",
+		VMEMMAP_BASE, VMEMMAP_BASE << 1);
 }

 static void __init tsb_phys_patch(void)
@ -1700,21 +1768,42 @@ static void __init tsb_phys_patch(void)
 #define NUM_KTSB_DESCR	1
 #endif
 static struct hv_tsb_descr ktsb_descr[NUM_KTSB_DESCR];
-extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES];
+
+/* The swapper TSBs are loaded with a base sequence of:
+ *
+ *	sethi	%uhi(SYMBOL), REG1
+ *	sethi	%hi(SYMBOL), REG2
+ *	or	REG1, %ulo(SYMBOL), REG1
+ *	or	REG2, %lo(SYMBOL), REG2
+ *	sllx	REG1, 32, REG1
+ *	or	REG1, REG2, REG1
+ *
+ * When we use physical addressing for the TSB accesses, we patch the
+ * first four instructions in the above sequence.
+ */

 static void patch_one_ktsb_phys(unsigned int *start, unsigned int *end, unsigned long pa)
 {
-	pa >>= KTSB_PHYS_SHIFT;
+	unsigned long high_bits, low_bits;
+
+	high_bits = (pa >> 32) & 0xffffffff;
+	low_bits = (pa >> 0) & 0xffffffff;

 	while (start < end) {
 		unsigned int *ia = (unsigned int *)(unsigned long)*start;

-		ia[0] = (ia[0] & ~0x3fffff) | (pa >> 10);
+		ia[0] = (ia[0] & ~0x3fffff) | (high_bits >> 10);
 		__asm__ __volatile__("flush	%0" : : "r" (ia));

-		ia[1] = (ia[1] & ~0x3ff) | (pa & 0x3ff);
+		ia[1] = (ia[1] & ~0x3fffff) | (low_bits >> 10);
 		__asm__ __volatile__("flush	%0" : : "r" (ia + 1));

+		ia[2] = (ia[2] & ~0x1fff) | (high_bits & 0x3ff);
+		__asm__ __volatile__("flush	%0" : : "r" (ia + 2));
+
+		ia[3] = (ia[3] & ~0x1fff) | (low_bits & 0x3ff);
+		__asm__ __volatile__("flush	%0" : : "r" (ia + 3));
+
 		start++;
 	}
 }
@ -1853,7 +1942,6 @@ static void __init sun4v_linear_pte_xor_finalize(void)
 /* paging_init() sets up the page tables */

 static unsigned long last_valid_pfn;
-pgd_t swapper_pg_dir[PTRS_PER_PGD];

 static void sun4u_pgprot_init(void);
 static void sun4v_pgprot_init(void);
@ -1956,16 +2044,10 @@ void __init paging_init(void)
 	 */
 	init_mm.pgd += ((shift) / (sizeof(pgd_t)));
 	
-	memset(swapper_low_pmd_dir, 0, sizeof(swapper_low_pmd_dir));
+	memset(swapper_pg_dir, 0, sizeof(swapper_pg_dir));

-	/* Now can init the kernel/bad page tables. */
-	pud_set(pud_offset(&swapper_pg_dir[0], 0),
-		swapper_low_pmd_dir + (shift / sizeof(pgd_t)));
-	
 	inherit_prom_mappings();
 	
-	init_kpte_bitmap();
-
 	/* Ok, we can use our TLB miss and window trap handlers safely.  */
 	setup_tba();

@ -2072,70 +2154,6 @@ int page_in_phys_avail(unsigned long paddr)
 	return 0;
 }

-static struct linux_prom64_registers pavail_rescan[MAX_BANKS] __initdata;
-static int pavail_rescan_ents __initdata;
-
-/* Certain OBP calls, such as fetching "available" properties, can
- * claim physical memory.  So, along with initializing the valid
- * address bitmap, what we do here is refetch the physical available
- * memory list again, and make sure it provides at least as much
- * memory as 'pavail' does.
- */
-static void __init setup_valid_addr_bitmap_from_pavail(unsigned long *bitmap)
-{
-	int i;
-
-	read_obp_memory("available", &pavail_rescan[0], &pavail_rescan_ents);
-
-	for (i = 0; i < pavail_ents; i++) {
-		unsigned long old_start, old_end;
-
-		old_start = pavail[i].phys_addr;
-		old_end = old_start + pavail[i].reg_size;
-		while (old_start < old_end) {
-			int n;
-
-			for (n = 0; n < pavail_rescan_ents; n++) {
-				unsigned long new_start, new_end;
-
-				new_start = pavail_rescan[n].phys_addr;
-				new_end = new_start +
-					pavail_rescan[n].reg_size;
-
-				if (new_start <= old_start &&
-				    new_end >= (old_start + PAGE_SIZE)) {
-					set_bit(old_start >> ILOG2_4MB, bitmap);
-					goto do_next_page;
-				}
-			}
-
-			prom_printf("mem_init: Lost memory in pavail\n");
-			prom_printf("mem_init: OLD start[%lx] size[%lx]\n",
-				    pavail[i].phys_addr,
-				    pavail[i].reg_size);
-			prom_printf("mem_init: NEW start[%lx] size[%lx]\n",
-				    pavail_rescan[i].phys_addr,
-				    pavail_rescan[i].reg_size);
-			prom_printf("mem_init: Cannot continue, aborting.\n");
-			prom_halt();
-
-		do_next_page:
-			old_start += PAGE_SIZE;
-		}
-	}
-}
-
-static void __init patch_tlb_miss_handler_bitmap(void)
-{
-	extern unsigned int valid_addr_bitmap_insn[];
-	extern unsigned int valid_addr_bitmap_patch[];
-
-	valid_addr_bitmap_insn[1] = valid_addr_bitmap_patch[1];
-	mb();
-	valid_addr_bitmap_insn[0] = valid_addr_bitmap_patch[0];
-	flushi(&valid_addr_bitmap_insn[0]);
-}
-
 static void __init register_page_bootmem_info(void)
 {
 #ifdef CONFIG_NEED_MULTIPLE_NODES
@ -2148,18 +2166,6 @@ static void __init register_page_bootmem_info(void)
 }
 void __init mem_init(void)
 {
-	unsigned long addr, last;
-
-	addr = PAGE_OFFSET + kern_base;
-	last = PAGE_ALIGN(kern_size) + addr;
-	while (addr < last) {
-		set_bit(__pa(addr) >> ILOG2_4MB, sparc64_valid_addr_bitmap);
-		addr += PAGE_SIZE;
-	}
-
-	setup_valid_addr_bitmap_from_pavail(sparc64_valid_addr_bitmap);
-	patch_tlb_miss_handler_bitmap();
-
 	high_memory = __va(last_valid_pfn << PAGE_SHIFT);

 	register_page_bootmem_info();
@ -2249,18 +2255,9 @@ unsigned long _PAGE_CACHE __read_mostly;
 EXPORT_SYMBOL(_PAGE_CACHE);

 #ifdef CONFIG_SPARSEMEM_VMEMMAP
-unsigned long vmemmap_table[VMEMMAP_SIZE];
-
-static long __meminitdata addr_start, addr_end;
-static int __meminitdata node_start;
-
 int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend,
 			       int node)
 {
-	unsigned long phys_start = (vstart - VMEMMAP_BASE);
-	unsigned long phys_end = (vend - VMEMMAP_BASE);
-	unsigned long addr = phys_start & VMEMMAP_CHUNK_MASK;
-	unsigned long end = VMEMMAP_ALIGN(phys_end);
 	unsigned long pte_base;

 	pte_base = (_PAGE_VALID | _PAGE_SZ4MB_4U |
@ -2271,47 +2268,52 @@ int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend,
 			    _PAGE_CP_4V | _PAGE_CV_4V |
 			    _PAGE_P_4V | _PAGE_W_4V);

-	for (; addr < end; addr += VMEMMAP_CHUNK) {
-		unsigned long *vmem_pp =
-			vmemmap_table + (addr >> VMEMMAP_CHUNK_SHIFT);
-		void *block;
+	pte_base |= _PAGE_PMD_HUGE;
+
+	vstart = vstart & PMD_MASK;
+	vend = ALIGN(vend, PMD_SIZE);
+	for (; vstart < vend; vstart += PMD_SIZE) {
+		pgd_t *pgd = pgd_offset_k(vstart);
+		unsigned long pte;
+		pud_t *pud;
+		pmd_t *pmd;
+
+		if (pgd_none(*pgd)) {
+			pud_t *new = vmemmap_alloc_block(PAGE_SIZE, node);
+
+			if (!new)
+				return -ENOMEM;
+			pgd_populate(&init_mm, pgd, new);
+		}
+
+		pud = pud_offset(pgd, vstart);
+		if (pud_none(*pud)) {
+			pmd_t *new = vmemmap_alloc_block(PAGE_SIZE, node);
+
+			if (!new)
+				return -ENOMEM;
+			pud_populate(&init_mm, pud, new);
+		}
+
+		pmd = pmd_offset(pud, vstart);
+
+		pte = pmd_val(*pmd);
+		if (!(pte & _PAGE_VALID)) {
+			void *block = vmemmap_alloc_block(PMD_SIZE, node);

-		if (!(*vmem_pp & _PAGE_VALID)) {
-			block = vmemmap_alloc_block(1UL << ILOG2_4MB, node);
 			if (!block)
 				return -ENOMEM;

-			*vmem_pp = pte_base | __pa(block);
-
-			/* check to see if we have contiguous blocks */
-			if (addr_end != addr || node_start != node) {
-				if (addr_start)
-					printk(KERN_DEBUG " [%lx-%lx] on node %d\n",
-					       addr_start, addr_end-1, node_start);
-				addr_start = addr;
-				node_start = node;
-			}
-			addr_end = addr + VMEMMAP_CHUNK;
+			pmd_val(*pmd) = pte_base | __pa(block);
 		}
 	}
-	return 0;
-}

-void __meminit vmemmap_populate_print_last(void)
-{
-	if (addr_start) {
-		printk(KERN_DEBUG " [%lx-%lx] on node %d\n",
-		       addr_start, addr_end-1, node_start);
-		addr_start = 0;
-		addr_end = 0;
-		node_start = 0;
-	}
+	return 0;
 }

 void vmemmap_free(unsigned long start, unsigned long end)
 {
 }
-
 #endif /* CONFIG_SPARSEMEM_VMEMMAP */

 static void prot_init_common(unsigned long page_none,
@ -2787,8 +2789,8 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
 			do_flush_tlb_kernel_range(start, LOW_OBP_ADDRESS);
 		}
 		if (end > HI_OBP_ADDRESS) {
-			flush_tsb_kernel_range(end, HI_OBP_ADDRESS);
-			do_flush_tlb_kernel_range(end, HI_OBP_ADDRESS);
+			flush_tsb_kernel_range(HI_OBP_ADDRESS, end);
+			do_flush_tlb_kernel_range(HI_OBP_ADDRESS, end);
 		}
 	} else {
 		flush_tsb_kernel_range(start, end);
--- a/arch/sparc/mm/init_64.h
+++ b/arch/sparc/mm/init_64.h
@ -8,15 +8,8 @@
 */

 #define MAX_PHYS_ADDRESS	(1UL << MAX_PHYS_ADDRESS_BITS)
-#define KPTE_BITMAP_CHUNK_SZ		(256UL * 1024UL * 1024UL)
-#define KPTE_BITMAP_BYTES	\
-	((MAX_PHYS_ADDRESS / KPTE_BITMAP_CHUNK_SZ) / 4)
-#define VALID_ADDR_BITMAP_CHUNK_SZ	(4UL * 1024UL * 1024UL)
-#define VALID_ADDR_BITMAP_BYTES	\
-	((MAX_PHYS_ADDRESS / VALID_ADDR_BITMAP_CHUNK_SZ) / 8)

 extern unsigned long kern_linear_pte_xor[4];
-extern unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)];
 extern unsigned int sparc64_highest_unlocked_tlb_ent;
 extern unsigned long sparc64_kern_pri_context;
 extern unsigned long sparc64_kern_pri_nuc_bits;
@ -38,15 +31,4 @@ extern unsigned long kern_locked_tte_data;

 void prom_world(int enter);

-#ifdef CONFIG_SPARSEMEM_VMEMMAP
-#define VMEMMAP_CHUNK_SHIFT	22
-#define VMEMMAP_CHUNK		(1UL << VMEMMAP_CHUNK_SHIFT)
-#define VMEMMAP_CHUNK_MASK	~(VMEMMAP_CHUNK - 1UL)
-#define VMEMMAP_ALIGN(x)	(((x)+VMEMMAP_CHUNK-1UL)&VMEMMAP_CHUNK_MASK)
-
-#define VMEMMAP_SIZE	((((1UL << MAX_PHYSADDR_BITS) >> PAGE_SHIFT) * \
-			  sizeof(struct page)) >> VMEMMAP_CHUNK_SHIFT)
-extern unsigned long vmemmap_table[VMEMMAP_SIZE];
-#endif
-
 #endif /* _SPARC64_MM_INIT_H */
--- a/arch/sparc/power/hibernate_asm.S
+++ b/arch/sparc/power/hibernate_asm.S
@ -54,8 +54,8 @@ ENTRY(swsusp_arch_resume)
 	 nop

 	/* Write PAGE_OFFSET to %g7 */
-	sethi	%uhi(PAGE_OFFSET), %g7
-	sllx	%g7, 32, %g7
+	sethi	%hi(PAGE_OFFSET), %g7
+	ldx	[%g7 + %lo(PAGE_OFFSET)], %g7

 	setuw	(PAGE_SIZE-8), %g3

--- a/arch/sparc/prom/bootstr_64.c
+++ b/arch/sparc/prom/bootstr_64.c
@ -14,7 +14,10 @@
 *          the .bss section or it will break things.
 */

-#define BARG_LEN  256
+/* We limit BARG_LEN to 1024 because this is the size of the
+ * 'barg_out' command line buffer in the SILO bootloader.
+ */
+#define BARG_LEN 1024
 struct {
 	int bootstr_len;
 	int bootstr_valid;
--- a/arch/sparc/prom/cif.S
+++ b/arch/sparc/prom/cif.S
@ -11,11 +11,10 @@
 	.text
 	.globl	prom_cif_direct
 prom_cif_direct:
+	save	%sp, -192, %sp
 	sethi	%hi(p1275buf), %o1
 	or	%o1, %lo(p1275buf), %o1
-	ldx	[%o1 + 0x0010], %o2	! prom_cif_stack
-	save	%o2, -192, %sp
-	ldx	[%i1 + 0x0008], %l2	! prom_cif_handler
+	ldx	[%o1 + 0x0008], %l2	! prom_cif_handler
 	mov	%g4, %l0
 	mov	%g5, %l1
 	mov	%g6, %l3
--- a/arch/sparc/prom/init_64.c
+++ b/arch/sparc/prom/init_64.c
@ -26,13 +26,13 @@ phandle prom_chosen_node;
 * It gets passed the pointer to the PROM vector.
 */

-extern void prom_cif_init(void *, void *);
+extern void prom_cif_init(void *);

-void __init prom_init(void *cif_handler, void *cif_stack)
+void __init prom_init(void *cif_handler)
 {
 	phandle node;

-	prom_cif_init(cif_handler, cif_stack);
+	prom_cif_init(cif_handler);

 	prom_chosen_node = prom_finddevice(prom_chosen_path);
 	if (!prom_chosen_node || (s32)prom_chosen_node == -1)
--- a/Show More
+++ b/Show More