Instruction Set Architectures Part II: x86, RISC, and CISC
Readings: 2.16-2.18
1
Instruction Set Architectures Part II: x86, RISC, and CISC - - PowerPoint PPT Presentation
Instruction Set Architectures Part II: x86, RISC, and CISC Readings: 2.16-2.18 1 Goals for this Class Understand how CPUs run programs How do we express the computation the CPU? How does the CPU execute it? How does the
Readings: 2.16-2.18
1
2
work?
performance?
running it?
3
holds
needs to used them)
(when it makes function calls).
base of the frame stack frame.
the frame
callee-saved)
calls.
$fp.
4
main:
$fp,24($sp)
$fp,$sp
$0,8($fp)
$v0,1
$v0,12($fp)
$v0,2
$v0,16($fp)
$3,12($fp)
$v0,16($fp)
$v0,$3,$v0
$v0,8($fp)
$v0,8($fp)
$sp,$fp
$fp,24($sp)
5
main:
$fp,24($sp)
$fp,$sp
$0,8($fp)
$v0,1
$v0,12($fp)
$v0,2
$v0,16($fp)
$3,12($fp)
$v0,16($fp)
$v0,$3,$v0
$v0,8($fp)
$v0,8($fp)
$sp,$fp
$fp,24($sp)
... value fp-relative 0x1020 0x101C +32 0x1018 +24 0x1014 +20 0x1010 +16 0x100C +12 0x1008 +8 0x1004 +4 0x1000 +0 0x0FFC
5
main:
$fp,24($sp)
$fp,$sp
$0,8($fp)
$v0,1
$v0,12($fp)
$v0,2
$v0,16($fp)
$3,12($fp)
$v0,16($fp)
$v0,$3,$v0
$v0,8($fp)
$v0,8($fp)
$sp,$fp
$fp,24($sp)
... value fp-relative 0x1020 0x101C +32 0x1018 +24 0x1014 +20 0x1010 +16 0x100C +12 0x1008 +8 0x1004 +4 0x1000 +0 0x0FFC
sp->
5
main:
$fp,24($sp)
$fp,$sp
$0,8($fp)
$v0,1
$v0,12($fp)
$v0,2
$v0,16($fp)
$3,12($fp)
$v0,16($fp)
$v0,$3,$v0
$v0,8($fp)
$v0,8($fp)
$sp,$fp
$fp,24($sp)
... value fp-relative 0x1020 0x101C +32 0x1018 +24 0x1014 +20 0x1010 +16 0x100C +12 0x1008 +8 0x1004 +4 0x1000 +0 0x0FFC
sp->
PC->
5
main:
$fp,24($sp)
$fp,$sp
$0,8($fp)
$v0,1
$v0,12($fp)
$v0,2
$v0,16($fp)
$3,12($fp)
$v0,16($fp)
$v0,$3,$v0
$v0,8($fp)
$v0,8($fp)
$sp,$fp
$fp,24($sp)
... value fp-relative 0x1020 0x101C +32 0x1018 +24 0x1014 +20 0x1010 +16 0x100C +12 0x1008 +8 0x1004 +4 0x1000 +0 0x0FFC
sp->
PC->
5
main:
$fp,24($sp)
$fp,$sp
$0,8($fp)
$v0,1
$v0,12($fp)
$v0,2
$v0,16($fp)
$3,12($fp)
$v0,16($fp)
$v0,$3,$v0
$v0,8($fp)
$v0,8($fp)
$sp,$fp
$fp,24($sp)
... value fp-relative 0x1020 0x101C +32 0x1018 +24 0x1014 +20 0x1010 +16 0x100C +12 0x1008 +8 0x1004 +4 0x1000 +0 0x0FFC
sp->
PC->
5
main:
$fp,24($sp)
$fp,$sp
$0,8($fp)
$v0,1
$v0,12($fp)
$v0,2
$v0,16($fp)
$3,12($fp)
$v0,16($fp)
$v0,$3,$v0
$v0,8($fp)
$v0,8($fp)
$sp,$fp
$fp,24($sp)
... value fp-relative 0x1020 0x101C +32 0x1018 +24 0x1014 +20 0x1010 +16 0x100C +12 0x1008 +8 0x1004 +4 0x1000 +0 0x0FFC
sp->
PC->
5
main:
$fp,24($sp)
$fp,$sp
$0,8($fp)
$v0,1
$v0,12($fp)
$v0,2
$v0,16($fp)
$3,12($fp)
$v0,16($fp)
$v0,$3,$v0
$v0,8($fp)
$v0,8($fp)
$sp,$fp
$fp,24($sp)
... value fp-relative 0x1020 0x101C +32 0x1018 +24 0x1014 +20 0x1010 +16 0x100C +12 0x1008 +8 0x1004 +4 0x1000 +0 0x0FFC
sp->
PC->
5
main:
$fp,24($sp)
$fp,$sp
$0,8($fp)
$v0,1
$v0,12($fp)
$v0,2
$v0,16($fp)
$3,12($fp)
$v0,16($fp)
$v0,$3,$v0
$v0,8($fp)
$v0,8($fp)
$sp,$fp
$fp,24($sp)
... value fp-relative 0x1020 0x101C +32 0x1018 +24 0x1014 +20 0x1010 +16 0x100C +12 0x1008 +8 0x1004 +4 0x1000 +0 0x0FFC
sp-> fp->
PC->
5
main:
$fp,24($sp)
$fp,$sp
$0,8($fp)
$v0,1
$v0,12($fp)
$v0,2
$v0,16($fp)
$3,12($fp)
$v0,16($fp)
$v0,$3,$v0
$v0,8($fp)
$v0,8($fp)
$sp,$fp
$fp,24($sp)
... value fp-relative 0x1020 0x101C +32 0x1018 +24 0x1014 +20 0x1010 +16 0x100C +12 0x1008 +8 0x1004 +4 0x1000 +0 0x0FFC
sp-> fp->
PC->
The stack frame
5
main:
$fp,24($sp)
$fp,$sp
$0,8($fp)
$v0,1
$v0,12($fp)
$v0,2
$v0,16($fp)
$3,12($fp)
$v0,16($fp)
$v0,$3,$v0
$v0,8($fp)
$v0,8($fp)
$sp,$fp
$fp,24($sp)
... value fp-relative 0x1020 0x101C +32 0x1018 +24 0x1014 +20 0x1010 +16 0x100C +12 0x1008 +8 0x1004 +4 0x1000 +0 0x0FFC
sp-> fp->
PC->
The stack frame
5
main:
$fp,24($sp)
$fp,$sp
$0,8($fp)
$v0,1
$v0,12($fp)
$v0,2
$v0,16($fp)
$3,12($fp)
$v0,16($fp)
$v0,$3,$v0
$v0,8($fp)
$v0,8($fp)
$sp,$fp
$fp,24($sp)
... value fp-relative 0x1020 0x101C +32 0x1018 +24 0x1014 +20 0x1010 +16 0x100C +12 0x1008 +8 0x1004 +4 0x1000 +0 0x0FFC
sp-> fp->
PC->
The stack frame
5
main:
$fp,24($sp)
$fp,$sp
$0,8($fp)
$v0,1
$v0,12($fp)
$v0,2
$v0,16($fp)
$3,12($fp)
$v0,16($fp)
$v0,$3,$v0
$v0,8($fp)
$v0,8($fp)
$sp,$fp
$fp,24($sp)
... value fp-relative 0x1020 0x101C +32 0x1018 +24 0x1014 +20 0x1010 +16 0x100C +12 0x1008 +8 0x1004 +4 0x1000 +0 0x0FFC
sp-> fp->
PC->
The stack frame
5
main:
$fp,24($sp)
$fp,$sp
$0,8($fp)
$v0,1
$v0,12($fp)
$v0,2
$v0,16($fp)
$3,12($fp)
$v0,16($fp)
$v0,$3,$v0
$v0,8($fp)
$v0,8($fp)
$sp,$fp
$fp,24($sp)
... value fp-relative 0x1020 0x101C +32 0x1018 +24 0x1014 +20 0x1010 +16 0x100C +12 0x1008 +8 0x1004 +4 0x1000 +0 0x0FFC
sp-> fp->
PC->
1
The stack frame
5
main:
$fp,24($sp)
$fp,$sp
$0,8($fp)
$v0,1
$v0,12($fp)
$v0,2
$v0,16($fp)
$3,12($fp)
$v0,16($fp)
$v0,$3,$v0
$v0,8($fp)
$v0,8($fp)
$sp,$fp
$fp,24($sp)
... value fp-relative 0x1020 0x101C +32 0x1018 +24 0x1014 +20 0x1010 +16 0x100C +12 0x1008 +8 0x1004 +4 0x1000 +0 0x0FFC
sp-> fp->
PC->
1
The stack frame
5
main:
$fp,24($sp)
$fp,$sp
$0,8($fp)
$v0,1
$v0,12($fp)
$v0,2
$v0,16($fp)
$3,12($fp)
$v0,16($fp)
$v0,$3,$v0
$v0,8($fp)
$v0,8($fp)
$sp,$fp
$fp,24($sp)
... value fp-relative 0x1020 0x101C +32 0x1018 +24 0x1014 +20 0x1010 +16 0x100C +12 0x1008 +8 0x1004 +4 0x1000 +0 0x0FFC
sp-> fp->
PC->
1 2
The stack frame
5
main:
$fp,24($sp)
$fp,$sp
$0,8($fp)
$v0,1
$v0,12($fp)
$v0,2
$v0,16($fp)
$3,12($fp)
$v0,16($fp)
$v0,$3,$v0
$v0,8($fp)
$v0,8($fp)
$sp,$fp
$fp,24($sp)
... value fp-relative 0x1020 0x101C +32 0x1018 +24 0x1014 +20 0x1010 +16 0x100C +12 0x1008 +8 0x1004 +4 0x1000 +0 0x0FFC
sp-> fp->
PC->
1 2
The stack frame
5
main:
$fp,24($sp)
$fp,$sp
$0,8($fp)
$v0,1
$v0,12($fp)
$v0,2
$v0,16($fp)
$3,12($fp)
$v0,16($fp)
$v0,$3,$v0
$v0,8($fp)
$v0,8($fp)
$sp,$fp
$fp,24($sp)
... value fp-relative 0x1020 0x101C +32 0x1018 +24 0x1014 +20 0x1010 +16 0x100C +12 0x1008 +8 0x1004 +4 0x1000 +0 0x0FFC
sp-> fp->
PC->
1 2 X 3
The stack frame
5
main:
$fp,24($sp)
$fp,$sp
$0,8($fp)
$v0,1
$v0,12($fp)
$v0,2
$v0,16($fp)
$3,12($fp)
$v0,16($fp)
$v0,$3,$v0
$v0,8($fp)
$v0,8($fp)
$sp,$fp
$fp,24($sp)
... value fp-relative 0x1020 0x101C +32 0x1018 +24 0x1014 +20 0x1010 +16 0x100C +12 0x1008 +8 0x1004 +4 0x1000 +0 0x0FFC
sp-> fp->
PC->
1 2 X 3
The stack frame
5
main:
$fp,24($sp)
$fp,$sp
$0,8($fp)
$v0,1
$v0,12($fp)
$v0,2
$v0,16($fp)
$3,12($fp)
$v0,16($fp)
$v0,$3,$v0
$v0,8($fp)
$v0,8($fp)
$sp,$fp
$fp,24($sp)
... value fp-relative 0x1020 0x101C +32 0x1018 +24 0x1014 +20 0x1010 +16 0x100C +12 0x1008 +8 0x1004 +4 0x1000 +0 0x0FFC
sp-> fp->
PC->
1 2 X 3
The stack frame
5
main:
$fp,24($sp)
$fp,$sp
$0,8($fp)
$v0,1
$v0,12($fp)
$v0,2
$v0,16($fp)
$3,12($fp)
$v0,16($fp)
$v0,$3,$v0
$v0,8($fp)
$v0,8($fp)
$sp,$fp
$fp,24($sp)
... value fp-relative 0x1020 0x101C +32 0x1018 +24 0x1014 +20 0x1010 +16 0x100C +12 0x1008 +8 0x1004 +4 0x1000 +0 0x0FFC
sp->
PC->
1 2 X 3
The stack frame
5
main:
$fp,24($sp)
$fp,$sp
$0,8($fp)
$v0,1
$v0,12($fp)
$v0,2
$v0,16($fp)
$3,12($fp)
$v0,16($fp)
$v0,$3,$v0
$v0,8($fp)
$v0,8($fp)
$sp,$fp
$fp,24($sp)
... value fp-relative 0x1020 0x101C +32 0x1018 +24 0x1014 +20 0x1010 +16 0x100C +12 0x1008 +8 0x1004 +4 0x1000 +0 0x0FFC
sp->
PC->
1 2 X 3
The stack frame
http://cseweb.ucsd.edu/classes/sp13/cse141-a/asm_examples
6
7
kind of thoroughness.
considerable cost) their CPUs so that this ugliness has relatively little impact on their processors’ performance (more on this later)
8
doing great.”
discussion was poorly structured and it didn't seem like she would be very helpful on the homework assignments.”
specific topic.”
the sessions
9
difficult for the allotted time. At the end of the quiz, you don't know what you got right or wrong right away. “
week), or to allow a dropped quiz or two for the times you really perform
incorrect and/or not supposed to be included.”
and it being on the quiz? (e.g., Thursday quiz only covers up til Tuesday)?
10
nice to be able to have the option to turn it in electronically.“
were about 4 problems asking for the same thing but with different numbers. I think 2 would be enough.”
11
hw but for the hours available, I can't make.”
board, as soon as the power slides come up the class moves too fast, and my brain does not grab all the information.”
13
“AT&T syntax”. This is different than “Intel Syntax”
en.wikipedia.org/wiki/ X86_assembly_language#Syntax)
the AT&T syntax (or at least be aware, if it doesn’t)!
14
15
8-bit 16-bit 32-bit 64-bit Description Notes %AL %AX %EAX %RAX The accumulator register These can be used more or less interchangeably, like the registers in MIPS. %BL %BX %EBX %RBX The base register %CL %CX %ECX %RCX The counter %DL %DX %EDX %RDX The data register %SPL %SP %ESP %RSP Stack pointer %SBP %BP %EBP %RBP Points to the base of the stack frame %RnB %RnW %RnD %Rn (n = 8...15) General purpose registers %SIL %SI %ESI %RSI Source index for string operations %DIL %DI %EDI %RDI Destination index for string operations %IP %EIP %RIP Instruction Pointer %FLAGS Condition codes
Different names (e.g. %AX vs. %EAX vs. %RAX) refer to different parts of the same register
%RAX (64 bits) %EAX (32 bits) %AX %AL
16
Instruction Suffixes b byte 8 bits s short 16 bits w word 16 bits l long 32 bits q quad 64 bits
addb $4, %al addw $4, %ax addl $4, %eax addq %rcx, %rax
17
Type Syntax Meaning Example Register %<reg> R[%reg] %RAX Immediate $nnn constant $42 Label $label label $foobar Displacement n(%reg) Mem[R[%reg] + n]
Base-Offset (%r1, %r2) Mem[R[%r1] + %R[%r2]] (%RAX,%AL) Scaled Offset (%r1, %r2, 2n) Mem[R[%r1] + %R[%r2] * 2n] (%RAX,%AL, 4) Scaled Offset Displacement k(%r1, %r2, 2n) Mem[R[%r1] + %R[%r2] * 2n + k]
mov.
18
x86 Instruction RTL MIPS Equivalent movb $0x05, %al R[al] = 0x05
movl -4(%ebp), %eax R[eax] = mem[R[ebp] -4] lw $t0, -4($t1) movl %eax, -4(%ebp) mem[R[ebp] -4] = R[eax] sw $t0, -4($t1) movl $LC0, (%esp) mem[R[esp]] = $LC0 la $at, LC0 sw $at, 0($t0) movl %R0, -4(%R1,%R2,4) mem[R[%R1] + R[%R2] * 2n + k] = %R0 slr $at, $t2, 2 add $at, $at, $t1 sw $t0, k($at) movl %R0, %R1 R[%R1] = R[%R0]
19
Instruction RTL
subl $0x05, %eax R[eax] = R[eax] - 0x05 subl %eax, -4(%ebp) mem[R[ebp] -4] = mem[R[ebp] -4] - R[eax] subl -4(%ebp), %eax R[eax] = R[eax] - mem[R[ebp] -4]
20
Instruction Meaning x86 Equivalent MIPS equivalent pushl %eax Push %eax onto the stack subl $4, %esp; movl %eax, (%esp) subi $sp, $sp, 4 sw $t0, ($sp) popl %eax Pop %eax off the stack movl (%esp), %eax addl $4, %esp lw $t0, ($sp) addi $sp, $sp, 4 enter n Save stack pointer, allocate stack frame with n bytes for locals push %BP mov %SP , %BP sub $n, %SP leave Restore the callers stack pointer. movl %ebp, %esp pop %ebp
holds
needs to used them)
(when it makes function calls).
base of the frame stack frame.
%esp
%ebp
21
main:
4(%esp), %ecx
$-16, %esp
%ebp
%esp, %ebp
$16, %esp
$0, -16(%ebp)
$1, -12(%ebp)
$2, -8(%ebp)
%eax, -16(%ebp)
$16, %esp
%ebp
up the flags register
22
Instruction Meaning
cmpl %r1 %r2 Set flags register for %r1 - %r2 jmp <location> Jump to <location> je <location> Jump to <location> if the equal flag is set jg, jge, jl, jle, jnz, ... jump if {>, >=, <, <=, != 0,}
23
Instruction Meaning MIPS call <label> Push the return address onto the stack. Jump to the function. Homework? ret Pop the return address off the stack and jump to it. lw $at, 0($sp) addi $sp, $sp, 4 jr $at
(rather than a register as in MIPS)
(with push)
int foo(int x, int y); ... d = foo(a, b); pushq %R9 pushq %R8 call foo movq %eax, d
the homeworks on x86 assembly
homeworks).
the missing bits
AT&T or Intel syntax!
comes first, rather than last.
24
25 http://cseweb.ucsd.edu/classes/sp13/cse141-a/asm_examples/1.html http://cseweb.ucsd.edu/classes/sp13/cse141-a/asm_examples/5.html
26 http://cseweb.ucsd.edu/classes/sp13/cse141-a/asm_examples/7.html
27 http://cseweb.ucsd.edu/classes/sp13/cse141-a/asm_examples/caller.html
28 http://cseweb.ucsd.edu/classes/sp13/cse141-a/asm_examples/callee.html
29 http://cseweb.ucsd.edu/classes/sp13/cse141-a/asm_examples/struct.html
30
instruction per cycle.
31
32
machine have been much higher
32 Opcode rs rt rd shamt funct
One Instruction Word
32 Bits
machine have been much higher
32
A (Very) Long Instruction Word
64 Bits Opcode rs rt rd shamt funct
One Instruction Word
32 Bits
machine have been much higher
32
A (Very) Long Instruction Word
64 Bits
A Really, Very Long Instruction Word
Opcode rs rt rd shamt funct
One Instruction Word
32 Bits
machine have been much higher
32
A (Very) Long Instruction Word
64 Bits Opcode rs rt rd shamt funct
One Instruction Word
32 Bits
machine have been much higher
33
add $s2, $s2, $s3 sub $s4, $s2, $s3 Results: $s2 = 10 $s4 = 6 Since the add and sub execute sequentially, the sub sees the new value for $s2
<ori $s2, $zero,6; ori $s3, $zero, 4> <add $s2, $s2, $s3; sub $s4, $s2, $s3> Results: $s2 = 10 $s4 = 2 Since the add and sub execute at the same time they both see the original value of $s2
34
mainstream success.
VLIW slots.
<ori $s2, $zero,6; ori $s3, $zero, 4> <add $s2, $s2, $s3; nop > <sub $s4, $s2, $s3; nop > Results: $s2 = 10 $s4 = 6 Now, the add and sub execute sequentially, but we’ve wasted space and resources executing nops.
extremely hard.
etc.)
(many companies) or,
instance, by providing special registers and instructions to eliminate branches), or
35
processing
almost 1.0) of the applications, Amdahl’s Laws says writing the code by hand is worthwhile.
processor in your cell phone.
36
today’s cool mobile gadgets
time as MIPS
licenses it to other companies.
from many vendors
features (e.g., integrated graphics co- processors)
text book.
37
moment)
them)
38
39
ARM Instruction Meaning LDR r0,[r1,#8] R[r0] = Mem[R[r1] + 8] LDR r0,[r1,#8]! R[r0] = Mem[R[r1] + 8]; R[r1] = R[r1] + 8 LDR r0,[r1],#8 R[r0] = Mem[R[r1]]; R[r1] = R[r1] + 8
40
ARM Instruction Meaning Add r1,r2,r3, LSL #4 R[r1] = R[r2] + (R[r3] << 4) Add r1,r2,r3, LSL r4 R[r1] = R[r2] + (R[r3] << R[r4])
predication for branches
conditional on one of the condition codes
is set, the instruction will execute.
nop.
condition code
branches can slow down execution.
41
if (x == y) p = q + r
ARM Assembly
CMP r0,r1 ADDEQ r2,r3,r4 x is r0 y is r1 p is r2 q is r3 r is r4
MIPS Assembly
x is $s0 y is $s1 p is $s2 q is $s3 r is $s4 bne $s0, $s1, foo add $s2, $s3, $s4 foo:
42
43
44
+4 +8 +12 +16
PC
Push 12(BP) Push 8(BP) Mult Push 0(BP) Push 4(BP) Mult Sub Store 16(BP) Pop
45
Push 12(BP) Push 8(BP) Mult Push 0(BP) Push 4(BP) Mult Sub Store 16(BP) Pop
+4 +8 +12 +16
PC
46
+4 +8 +12 +16
PC
Push 12(BP) Push 8(BP) Mult Push 0(BP) Push 4(BP) Mult Sub Store 16(BP) Pop
47
+4 +8 +12 +16
PC
Push 12(BP) Push 8(BP) Mult Push 0(BP) Push 4(BP) Mult Sub Store 16(BP) Pop
48
+4 +8 +12 +16
PC
Push 12(BP) Push 8(BP) Mult Push 0(BP) Push 4(BP) Mult Sub Store 16(BP) Pop
49
+4 +8 +12 +16
PC
Push 12(BP) Push 8(BP) Mult Push 0(BP) Push 4(BP) Mult Sub Store 16(BP) Pop
50
+4 +8 +12 +16
PC
Push 12(BP) Push 8(BP) Mult Push 0(BP) Push 4(BP) Mult Sub Store 16(BP) Pop
51
+4 +8 +12 +16
PC
Push 12(BP) Push 8(BP) Mult Push 0(BP) Push 4(BP) Mult Sub Store 16(BP) Pop
52
+4 +8 +12 +16
PC
Push 12(BP) Push 8(BP) Mult Push 0(BP) Push 4(BP) Mult Sub Store 16(BP) Pop
53
execution of a builtin function in the CPU. Simple hardware to execute complex instructions (but CPIs are very, very high)
54
instructions
evolve gracefully, etc.
55
CPUs can go fast.
benefits should outweigh this.
easily.
both.
56
“load-store architecture”)
carefully about the hardware when they designed the ISA.
57
58
58
used when, and which addressing modes are valid where.
59
“regular” -- all instructions look more or less the same.
to minimize hardware complexity
for 141L, but it would be harder than MIPS
60
fast processors.
processors inside
(uops), and feed them to a RISC-style processor
61
x86 Code movb $0x05, %al movl -4(%ebp), %eax movl %eax, -4(%ebp) movl %R0, -4(%R1,%R2,4) movl %R0, %R1
lw $t0, -4($t1) sw $t0, -4($t1) slr $at, $t2, 2 add $at, $at, $t1 sw $t0, k($at)
fast processors.
processors inside
(uops), and feed them to a RISC-style processor
61
x86 Code movb $0x05, %al movl -4(%ebp), %eax movl %eax, -4(%ebp) movl %R0, -4(%R1,%R2,4) movl %R0, %R1
lw $t0, -4($t1) sw $t0, -4($t1) slr $at, $t2, 2 add $at, $at, $t1 sw $t0, k($at)
PC->
fast processors.
processors inside
(uops), and feed them to a RISC-style processor
61
x86 Code movb $0x05, %al movl -4(%ebp), %eax movl %eax, -4(%ebp) movl %R0, -4(%R1,%R2,4) movl %R0, %R1
lw $t0, -4($t1) sw $t0, -4($t1) slr $at, $t2, 2 add $at, $at, $t1 sw $t0, k($at)
PC->
fast processors.
processors inside
(uops), and feed them to a RISC-style processor
61
x86 Code movb $0x05, %al movl -4(%ebp), %eax movl %eax, -4(%ebp) movl %R0, -4(%R1,%R2,4) movl %R0, %R1
lw $t0, -4($t1) sw $t0, -4($t1) slr $at, $t2, 2 add $at, $at, $t1 sw $t0, k($at)
PC->
fast processors.
processors inside
(uops), and feed them to a RISC-style processor
61
x86 Code movb $0x05, %al movl -4(%ebp), %eax movl %eax, -4(%ebp) movl %R0, -4(%R1,%R2,4) movl %R0, %R1
lw $t0, -4($t1) sw $t0, -4($t1) slr $at, $t2, 2 add $at, $at, $t1 sw $t0, k($at)
PC->
fast processors.
processors inside
(uops), and feed them to a RISC-style processor
61
x86 Code movb $0x05, %al movl -4(%ebp), %eax movl %eax, -4(%ebp) movl %R0, -4(%R1,%R2,4) movl %R0, %R1
lw $t0, -4($t1) sw $t0, -4($t1) slr $at, $t2, 2 add $at, $at, $t1 sw $t0, k($at)
PC->
fast processors.
processors inside
(uops), and feed them to a RISC-style processor
61
x86 Code movb $0x05, %al movl -4(%ebp), %eax movl %eax, -4(%ebp) movl %R0, -4(%R1,%R2,4) movl %R0, %R1
lw $t0, -4($t1) sw $t0, -4($t1) slr $at, $t2, 2 add $at, $at, $t1 sw $t0, k($at)
PC->
fast processors.
processors inside
(uops), and feed them to a RISC-style processor
61
x86 Code movb $0x05, %al movl -4(%ebp), %eax movl %eax, -4(%ebp) movl %R0, -4(%R1,%R2,4) movl %R0, %R1
lw $t0, -4($t1) sw $t0, -4($t1) slr $at, $t2, 2 add $at, $at, $t1 sw $t0, k($at)
PC->
fast processors.
processors inside
(uops), and feed them to a RISC-style processor
61
x86 Code movb $0x05, %al movl -4(%ebp), %eax movl %eax, -4(%ebp) movl %R0, -4(%R1,%R2,4) movl %R0, %R1
lw $t0, -4($t1) sw $t0, -4($t1) slr $at, $t2, 2 add $at, $at, $t1 sw $t0, k($at)
PC->
fast processors.
processors inside
(uops), and feed them to a RISC-style processor
61
x86 Code movb $0x05, %al movl -4(%ebp), %eax movl %eax, -4(%ebp) movl %R0, -4(%R1,%R2,4) movl %R0, %R1
lw $t0, -4($t1) sw $t0, -4($t1) slr $at, $t2, 2 add $at, $at, $t1 sw $t0, k($at)
PC->
fast processors.
processors inside
(uops), and feed them to a RISC-style processor
61
x86 Code movb $0x05, %al movl -4(%ebp), %eax movl %eax, -4(%ebp) movl %R0, -4(%R1,%R2,4) movl %R0, %R1
lw $t0, -4($t1) sw $t0, -4($t1) slr $at, $t2, 2 add $at, $at, $t1 sw $t0, k($at)
PC->
fast processors.
processors inside
(uops), and feed them to a RISC-style processor
61
x86 Code movb $0x05, %al movl -4(%ebp), %eax movl %eax, -4(%ebp) movl %R0, -4(%R1,%R2,4) movl %R0, %R1
lw $t0, -4($t1) sw $t0, -4($t1) slr $at, $t2, 2 add $at, $at, $t1 sw $t0, k($at)
PC->
The preceding was a dramatization. MIPS instructions were used for clarity and because I had some laying around.
No x86 instruction were harmed in the production of this slide.
“soft” implementation of the x86 instruction set.
VLIW instruction set and execute that instead.
instead.
Transmeta made the case for low-power x86 processors), it started producing very efficient CPUs.
62