Differences
This shows you the differences between two versions of the page.
analytical_modeling_of_system_temperature:index [2009/04/09 04:51] anshulg |
analytical_modeling_of_system_temperature:index [2010/08/17 18:27] (current) |
||
---|---|---|---|
Line 1: | Line 1: | ||
===== Analytical Modeling of System Temperature ===== | ===== Analytical Modeling of System Temperature ===== | ||
==== Results ==== | ==== Results ==== | ||
+ | ===Nops test=== | ||
+ | |||
== nops test == | == nops test == | ||
- | Essentially this is a while loop with a ''nop'' in it | + | This is a while loop with a single ''nop'' inside it.\\ |
+ | {{:analytical_modeling_of_system_temperature:nop.png}} | ||
<code> | <code> | ||
08048394 <main>: | 08048394 <main>: | ||
Line 17: | Line 20: | ||
80483af: 90 nop | 80483af: 90 nop | ||
</code> | </code> | ||
+ | |||
+ | ===Integer Operations=== | ||
== int add test == | == int add test == | ||
- | Essentially this is a while loop with a ''x++'' in it | + | This is a while loop with a single ''x++'' in it.\\ |
+ | {{:analytical_modeling_of_system_temperature:add.png}} | ||
<code> | <code> | ||
08048394 <main>: | 08048394 <main>: | ||
Line 49: | Line 55: | ||
== int sub test == | == int sub test == | ||
- | Essentially this is a while loop with a ''x--'' in it | + | A while loop with a single ''x--'' in it.\\ |
+ | {{:analytical_modeling_of_system_temperature:sub.png}} | ||
<code> | <code> | ||
08048394 <main>: | 08048394 <main>: | ||
Line 79: | Line 86: | ||
== int mul test == | == int mul test == | ||
- | Essentially this is a while loop which takes two integers (''x=54;y=82;'') and multiplies them (''z=x*y''). | + | A while loop which takes two integers (''x=54;y=82;'') and multiplies them (''z=x*y''). \\ |
+ | {{:analytical_modeling_of_system_temperature:mul.png}} | ||
<code> | <code> | ||
08048394 <main>: | 08048394 <main>: | ||
Line 99: | Line 107: | ||
== int div test == | == int div test == | ||
- | Essentially this is a while loop which takes two integers (''x=54;y=82;'') and multiplies them (''z=y/x''). | + | A while loop which takes two integers (''x=54;y=82;'') and divides them (''z=y/x''). \\ |
+ | {{:analytical_modeling_of_system_temperature:div.png}} | ||
<code> | <code> | ||
08048394 <main>: | 08048394 <main>: | ||
Line 127: | Line 136: | ||
80483cf: 90 nop | 80483cf: 90 nop | ||
</code> | </code> | ||
+ | |||
+ | ===FP Operations=== | ||
== fp add test == | == fp add test == | ||
- | This is a while loop which takes two doubles and adds them just like in the int add test. | + | This is a while loop which takes two doubles and adds them just like in the int add test. \\ |
+ | {{:analytical_modeling_of_system_temperature:fpadd.png}} | ||
<code> | <code> | ||
08048394 <main>: | 08048394 <main>: | ||
Line 164: | Line 176: | ||
== fp sub test == | == fp sub test == | ||
- | A while loop which takes two doubles and subtracts them. | + | A while loop which takes two doubles and subtracts them. \\ |
+ | {{:analytical_modeling_of_system_temperature:fpsub.png}} | ||
<code> | <code> | ||
08048394 <main>: | 08048394 <main>: | ||
Line 199: | Line 212: | ||
== fp mul test == | == fp mul test == | ||
- | A while loop which takes two doubles and multiplies them. Within the loop, we take two doubles, so we are multiplying the same numbers again and again. This should avoid overflow. | + | A while loop which takes two doubles and multiplies them. Within the loop, we take two doubles, so we are multiplying the same numbers again and again. This should avoid overflow.\\ |
+ | {{:analytical_modeling_of_system_temperature:fpmul.png}} | ||
<code> | <code> | ||
08048394 <main>: | 08048394 <main>: | ||
Line 231: | Line 245: | ||
80483ce: 90 nop | 80483ce: 90 nop | ||
80483cf: 90 nop | 80483cf: 90 nop | ||
+ | </code> | ||
+ | |||
+ | == fp div test == | ||
+ | A while loop which takes two doubles and divides them. The lower peak temperature w.r.t. fp mul test is due to lack of pipelining in the fp div FU.\\ | ||
+ | {{:analytical_modeling_of_system_temperature:fpdiv.png}} | ||
+ | <code> | ||
+ | 08048394 <main>: | ||
+ | 8048394: 8d 4c 24 04 lea 0x4(%esp),%ecx | ||
+ | 8048398: 83 e4 f0 and $0xfffffff0,%esp | ||
+ | 804839b: ff 71 fc pushl -0x4(%ecx) | ||
+ | 804839e: 55 push %ebp | ||
+ | 804839f: 89 e5 mov %esp,%ebp | ||
+ | 80483a1: 51 push %ecx | ||
+ | 80483a2: 83 ec 24 sub $0x24,%esp | ||
+ | 80483a5: dd 05 90 84 04 08 fldl 0x8048490 | ||
+ | 80483ab: dd 5d f0 fstpl -0x10(%ebp) | ||
+ | 80483ae: dd 05 98 84 04 08 fldl 0x8048498 | ||
+ | 80483b4: dd 5d e8 fstpl -0x18(%ebp) | ||
+ | 80483b7: dd 45 e8 fldl -0x18(%ebp) | ||
+ | 80483ba: dc 75 f0 fdivl -0x10(%ebp) | ||
+ | 80483bd: dd 5d e0 fstpl -0x20(%ebp) | ||
+ | 80483c0: eb e3 jmp 80483a5 <main+0x11> | ||
+ | 80483c2: 90 nop | ||
+ | 80483c3: 90 nop | ||
+ | 80483c4: 90 nop | ||
+ | 80483c5: 90 nop | ||
+ | 80483c6: 90 nop | ||
+ | 80483c7: 90 nop | ||
+ | 80483c8: 90 nop | ||
+ | 80483c9: 90 nop | ||
+ | 80483ca: 90 nop | ||
+ | 80483cb: 90 nop | ||
+ | 80483cc: 90 nop | ||
+ | 80483cd: 90 nop | ||
+ | 80483ce: 90 nop | ||
+ | 80483cf: 90 nop | ||
+ | </code> | ||
+ | |||
+ | ===Unrolled Integer Operations=== | ||
+ | |||
+ | == unrolled int add test == | ||
+ | This is a while loop with multiple ''x++''s in it.\\ | ||
+ | {{:analytical_modeling_of_system_temperature:add2.png}} | ||
+ | <code> | ||
+ | 08048394 <main>: | ||
+ | 8048394: 8d 4c 24 04 lea 0x4(%esp),%ecx | ||
+ | 8048398: 83 e4 f0 and $0xfffffff0,%esp | ||
+ | 804839b: ff 71 fc pushl -0x4(%ecx) | ||
+ | 804839e: 55 push %ebp | ||
+ | 804839f: 89 e5 mov %esp,%ebp | ||
+ | 80483a1: 51 push %ecx | ||
+ | 80483a2: 83 ec 10 sub $0x10,%esp | ||
+ | 80483a5: c7 45 f8 00 00 00 00 movl $0x0,-0x8(%ebp) | ||
+ | 80483ac: 83 45 f8 01 addl $0x1,-0x8(%ebp) | ||
+ | 80483b0: 83 45 f8 01 addl $0x1,-0x8(%ebp) | ||
+ | 80483b4: 83 45 f8 01 addl $0x1,-0x8(%ebp) | ||
+ | 80483b8: 83 45 f8 01 addl $0x1,-0x8(%ebp) | ||
+ | 80483bc: 83 45 f8 01 addl $0x1,-0x8(%ebp) | ||
+ | 80483c0: 83 45 f8 01 addl $0x1,-0x8(%ebp) | ||
+ | 80483c4: 83 45 f8 01 addl $0x1,-0x8(%ebp) | ||
+ | 80483c8: 83 45 f8 01 addl $0x1,-0x8(%ebp) | ||
+ | 80483cc: 83 45 f8 01 addl $0x1,-0x8(%ebp) | ||
+ | 80483d0: 83 45 f8 01 addl $0x1,-0x8(%ebp) | ||
+ | 80483d4: eb cf jmp 80483a5 <main+0x11> | ||
+ | 80483d6: 90 nop | ||
+ | 80483d7: 90 nop | ||
+ | 80483d8: 90 nop | ||
+ | 80483d9: 90 nop | ||
+ | 80483da: 90 nop | ||
+ | 80483db: 90 nop | ||
+ | 80483dc: 90 nop | ||
+ | 80483dd: 90 nop | ||
+ | 80483de: 90 nop | ||
+ | 80483df: 90 nop | ||
+ | </code> | ||
+ | |||
+ | == unrolled int sub test == | ||
+ | A while loop with multiple ''x--''s in it.\\ | ||
+ | {{:analytical_modeling_of_system_temperature:sub2.png}} | ||
+ | <code> | ||
+ | 08048394 <main>: | ||
+ | 8048394: 8d 4c 24 04 lea 0x4(%esp),%ecx | ||
+ | 8048398: 83 e4 f0 and $0xfffffff0,%esp | ||
+ | 804839b: ff 71 fc pushl -0x4(%ecx) | ||
+ | 804839e: 55 push %ebp | ||
+ | 804839f: 89 e5 mov %esp,%ebp | ||
+ | 80483a1: 51 push %ecx | ||
+ | 80483a2: 83 ec 10 sub $0x10,%esp | ||
+ | 80483a5: c7 45 f8 14 00 00 00 movl $0x14,-0x8(%ebp) | ||
+ | 80483ac: 83 6d f8 01 subl $0x1,-0x8(%ebp) | ||
+ | 80483b0: 83 6d f8 01 subl $0x1,-0x8(%ebp) | ||
+ | 80483b4: 83 6d f8 01 subl $0x1,-0x8(%ebp) | ||
+ | 80483b8: 83 6d f8 01 subl $0x1,-0x8(%ebp) | ||
+ | 80483bc: 83 6d f8 01 subl $0x1,-0x8(%ebp) | ||
+ | 80483c0: 83 6d f8 01 subl $0x1,-0x8(%ebp) | ||
+ | 80483c4: 83 6d f8 01 subl $0x1,-0x8(%ebp) | ||
+ | 80483c8: 83 6d f8 01 subl $0x1,-0x8(%ebp) | ||
+ | 80483cc: 83 6d f8 01 subl $0x1,-0x8(%ebp) | ||
+ | 80483d0: 83 6d f8 01 subl $0x1,-0x8(%ebp) | ||
+ | 80483d4: eb cf jmp 80483a5 <main+0x11> | ||
+ | 80483d6: 90 nop | ||
+ | 80483d7: 90 nop | ||
+ | 80483d8: 90 nop | ||
+ | 80483d9: 90 nop | ||
+ | 80483da: 90 nop | ||
+ | 80483db: 90 nop | ||
+ | 80483dc: 90 nop | ||
+ | 80483dd: 90 nop | ||
+ | 80483de: 90 nop | ||
+ | 80483df: 90 nop | ||
+ | </code> | ||
+ | |||
+ | == unrolled int mul test == | ||
+ | A while loop which takes two integers and multiplies them multiple times within the same loop. \\ | ||
+ | {{:analytical_modeling_of_system_temperature:mul2.png}} | ||
+ | <code> | ||
+ | 08048394 <main>: | ||
+ | 8048394: 8d 4c 24 04 lea 0x4(%esp),%ecx | ||
+ | 8048398: 83 e4 f0 and $0xfffffff0,%esp | ||
+ | 804839b: ff 71 fc pushl -0x4(%ecx) | ||
+ | 804839e: 55 push %ebp | ||
+ | 804839f: 89 e5 mov %esp,%ebp | ||
+ | 80483a1: 51 push %ecx | ||
+ | 80483a2: 83 ec 10 sub $0x10,%esp | ||
+ | 80483a5: c7 45 f8 36 00 00 00 movl $0x36,-0x8(%ebp) | ||
+ | 80483ac: c7 45 f4 52 00 00 00 movl $0x52,-0xc(%ebp) | ||
+ | 80483b3: 8b 45 f8 mov -0x8(%ebp),%eax | ||
+ | 80483b6: 0f af 45 f4 imul -0xc(%ebp),%eax | ||
+ | 80483ba: 89 45 f0 mov %eax,-0x10(%ebp) | ||
+ | 80483bd: 8b 45 f8 mov -0x8(%ebp),%eax | ||
+ | 80483c0: 0f af 45 f4 imul -0xc(%ebp),%eax | ||
+ | 80483c4: 89 45 f0 mov %eax,-0x10(%ebp) | ||
+ | 80483c7: 8b 45 f8 mov -0x8(%ebp),%eax | ||
+ | 80483ca: 0f af 45 f4 imul -0xc(%ebp),%eax | ||
+ | 80483ce: 89 45 f0 mov %eax,-0x10(%ebp) | ||
+ | 80483d1: 8b 45 f8 mov -0x8(%ebp),%eax | ||
+ | 80483d4: 0f af 45 f4 imul -0xc(%ebp),%eax | ||
+ | 80483d8: 89 45 f0 mov %eax,-0x10(%ebp) | ||
+ | 80483db: 8b 45 f8 mov -0x8(%ebp),%eax | ||
+ | 80483de: 0f af 45 f4 imul -0xc(%ebp),%eax | ||
+ | 80483e2: 89 45 f0 mov %eax,-0x10(%ebp) | ||
+ | 80483e5: 8b 45 f8 mov -0x8(%ebp),%eax | ||
+ | 80483e8: 0f af 45 f4 imul -0xc(%ebp),%eax | ||
+ | 80483ec: 89 45 f0 mov %eax,-0x10(%ebp) | ||
+ | 80483ef: 8b 45 f8 mov -0x8(%ebp),%eax | ||
+ | 80483f2: 0f af 45 f4 imul -0xc(%ebp),%eax | ||
+ | 80483f6: 89 45 f0 mov %eax,-0x10(%ebp) | ||
+ | 80483f9: 8b 45 f8 mov -0x8(%ebp),%eax | ||
+ | 80483fc: 0f af 45 f4 imul -0xc(%ebp),%eax | ||
+ | 8048400: 89 45 f0 mov %eax,-0x10(%ebp) | ||
+ | 8048403: 8b 45 f8 mov -0x8(%ebp),%eax | ||
+ | 8048406: 0f af 45 f4 imul -0xc(%ebp),%eax | ||
+ | 804840a: 89 45 f0 mov %eax,-0x10(%ebp) | ||
+ | 804840d: 8b 45 f8 mov -0x8(%ebp),%eax | ||
+ | 8048410: 0f af 45 f4 imul -0xc(%ebp),%eax | ||
+ | 8048414: 89 45 f0 mov %eax,-0x10(%ebp) | ||
+ | 8048417: eb 8c jmp 80483a5 <main+0x11> | ||
+ | 8048419: 90 nop | ||
+ | 804841a: 90 nop | ||
+ | 804841b: 90 nop | ||
+ | 804841c: 90 nop | ||
+ | 804841d: 90 nop | ||
+ | 804841e: 90 nop | ||
+ | 804841f: 90 nop | ||
+ | </code> | ||
+ | |||
+ | == unrolled int div test == | ||
+ | A while loop which takes two integers and divides them multiple times in the same loop. \\ | ||
+ | {{:analytical_modeling_of_system_temperature:div2.png}} | ||
+ | <code> | ||
+ | 08048394 <main>: | ||
+ | 8048394: 8d 4c 24 04 lea 0x4(%esp),%ecx | ||
+ | 8048398: 83 e4 f0 and $0xfffffff0,%esp | ||
+ | 804839b: ff 71 fc pushl -0x4(%ecx) | ||
+ | 804839e: 55 push %ebp | ||
+ | 804839f: 89 e5 mov %esp,%ebp | ||
+ | 80483a1: 51 push %ecx | ||
+ | 80483a2: 83 ec 3c sub $0x3c,%esp | ||
+ | 80483a5: c7 45 f8 36 00 00 00 movl $0x36,-0x8(%ebp) | ||
+ | 80483ac: c7 45 f4 52 00 00 00 movl $0x52,-0xc(%ebp) | ||
+ | 80483b3: 8b 45 f4 mov -0xc(%ebp),%eax | ||
+ | 80483b6: 89 45 c0 mov %eax,-0x40(%ebp) | ||
+ | 80483b9: 8b 55 c0 mov -0x40(%ebp),%edx | ||
+ | 80483bc: 89 d0 mov %edx,%eax | ||
+ | 80483be: c1 fa 1f sar $0x1f,%edx | ||
+ | 80483c1: f7 7d f8 idivl -0x8(%ebp) | ||
+ | 80483c4: 89 45 f0 mov %eax,-0x10(%ebp) | ||
+ | 80483c7: 8b 45 f4 mov -0xc(%ebp),%eax | ||
+ | 80483ca: 89 45 c0 mov %eax,-0x40(%ebp) | ||
+ | 80483cd: 8b 55 c0 mov -0x40(%ebp),%edx | ||
+ | 80483d0: 89 d0 mov %edx,%eax | ||
+ | 80483d2: c1 fa 1f sar $0x1f,%edx | ||
+ | 80483d5: f7 7d f8 idivl -0x8(%ebp) | ||
+ | 80483d8: 89 45 f0 mov %eax,-0x10(%ebp) | ||
+ | 80483db: 8b 45 f4 mov -0xc(%ebp),%eax | ||
+ | 80483de: 89 45 c0 mov %eax,-0x40(%ebp) | ||
+ | 80483e1: 8b 55 c0 mov -0x40(%ebp),%edx | ||
+ | 80483e4: 89 d0 mov %edx,%eax | ||
+ | 80483e6: c1 fa 1f sar $0x1f,%edx | ||
+ | 80483e9: f7 7d f8 idivl -0x8(%ebp) | ||
+ | 80483ec: 89 45 f0 mov %eax,-0x10(%ebp) | ||
+ | 80483ef: 8b 45 f4 mov -0xc(%ebp),%eax | ||
+ | 80483f2: 89 45 c0 mov %eax,-0x40(%ebp) | ||
+ | 80483f5: 8b 55 c0 mov -0x40(%ebp),%edx | ||
+ | 80483f8: 89 d0 mov %edx,%eax | ||
+ | 80483fa: c1 fa 1f sar $0x1f,%edx | ||
+ | 80483fd: f7 7d f8 idivl -0x8(%ebp) | ||
+ | 8048400: 89 45 f0 mov %eax,-0x10(%ebp) | ||
+ | 8048403: 8b 45 f4 mov -0xc(%ebp),%eax | ||
+ | 8048406: 89 45 c0 mov %eax,-0x40(%ebp) | ||
+ | 8048409: 8b 55 c0 mov -0x40(%ebp),%edx | ||
+ | 804840c: 89 d0 mov %edx,%eax | ||
+ | 804840e: c1 fa 1f sar $0x1f,%edx | ||
+ | 8048411: f7 7d f8 idivl -0x8(%ebp) | ||
+ | 8048414: 89 45 f0 mov %eax,-0x10(%ebp) | ||
+ | 8048417: 8b 45 f4 mov -0xc(%ebp),%eax | ||
+ | 804841a: 89 45 c0 mov %eax,-0x40(%ebp) | ||
+ | 804841d: 8b 55 c0 mov -0x40(%ebp),%edx | ||
+ | 8048420: 89 d0 mov %edx,%eax | ||
+ | 8048422: c1 fa 1f sar $0x1f,%edx | ||
+ | 8048425: f7 7d f8 idivl -0x8(%ebp) | ||
+ | 8048428: 89 45 f0 mov %eax,-0x10(%ebp) | ||
+ | 804842b: 8b 45 f4 mov -0xc(%ebp),%eax | ||
+ | 804842e: 89 45 c0 mov %eax,-0x40(%ebp) | ||
+ | 8048431: 8b 55 c0 mov -0x40(%ebp),%edx | ||
+ | 8048434: 89 d0 mov %edx,%eax | ||
+ | 8048436: c1 fa 1f sar $0x1f,%edx | ||
+ | 8048439: f7 7d f8 idivl -0x8(%ebp) | ||
+ | 804843c: 89 45 f0 mov %eax,-0x10(%ebp) | ||
+ | 804843f: 8b 45 f4 mov -0xc(%ebp),%eax | ||
+ | 8048442: 89 45 c0 mov %eax,-0x40(%ebp) | ||
+ | 8048445: 8b 55 c0 mov -0x40(%ebp),%edx | ||
+ | 8048448: 89 d0 mov %edx,%eax | ||
+ | 804844a: c1 fa 1f sar $0x1f,%edx | ||
+ | 804844d: f7 7d f8 idivl -0x8(%ebp) | ||
+ | 8048450: 89 45 f0 mov %eax,-0x10(%ebp) | ||
+ | 8048453: 8b 45 f4 mov -0xc(%ebp),%eax | ||
+ | 8048456: 89 45 c0 mov %eax,-0x40(%ebp) | ||
+ | 8048459: 8b 55 c0 mov -0x40(%ebp),%edx | ||
+ | 804845c: 89 d0 mov %edx,%eax | ||
+ | 804845e: c1 fa 1f sar $0x1f,%edx | ||
+ | 8048461: f7 7d f8 idivl -0x8(%ebp) | ||
+ | 8048464: 89 45 f0 mov %eax,-0x10(%ebp) | ||
+ | 8048467: 8b 45 f4 mov -0xc(%ebp),%eax | ||
+ | 804846a: 89 45 c0 mov %eax,-0x40(%ebp) | ||
+ | 804846d: 8b 55 c0 mov -0x40(%ebp),%edx | ||
+ | 8048470: 89 d0 mov %edx,%eax | ||
+ | 8048472: c1 fa 1f sar $0x1f,%edx | ||
+ | 8048475: f7 7d f8 idivl -0x8(%ebp) | ||
+ | 8048478: 89 45 f0 mov %eax,-0x10(%ebp) | ||
+ | 804847b: e9 25 ff ff ff jmp 80483a5 <main+0x11> | ||
+ | </code> | ||
+ | |||
+ | ===Unrolled FP Operations=== | ||
+ | |||
+ | == unrolled fp add test == | ||
+ | This is a while loop which takes two doubles and adds them multiple times in the same loop just like in the unrolled int add test. \\ | ||
+ | {{:analytical_modeling_of_system_temperature:fpadd2.png}} | ||
+ | <code> | ||
+ | 08048394 <main>: | ||
+ | 8048394: 8d 4c 24 04 lea 0x4(%esp),%ecx | ||
+ | 8048398: 83 e4 f0 and $0xfffffff0,%esp | ||
+ | 804839b: ff 71 fc pushl -0x4(%ecx) | ||
+ | 804839e: 55 push %ebp | ||
+ | 804839f: 89 e5 mov %esp,%ebp | ||
+ | 80483a1: 51 push %ecx | ||
+ | 80483a2: 83 ec 24 sub $0x24,%esp | ||
+ | 80483a5: dd 05 e0 84 04 08 fldl 0x80484e0 | ||
+ | 80483ab: dd 5d f0 fstpl -0x10(%ebp) | ||
+ | 80483ae: dd 05 e8 84 04 08 fldl 0x80484e8 | ||
+ | 80483b4: dd 5d e8 fstpl -0x18(%ebp) | ||
+ | 80483b7: dd 45 e8 fldl -0x18(%ebp) | ||
+ | 80483ba: dc 45 f0 faddl -0x10(%ebp) | ||
+ | 80483bd: dd 5d e0 fstpl -0x20(%ebp) | ||
+ | 80483c0: dd 45 e8 fldl -0x18(%ebp) | ||
+ | 80483c3: dc 45 f0 faddl -0x10(%ebp) | ||
+ | 80483c6: dd 5d e0 fstpl -0x20(%ebp) | ||
+ | 80483c9: dd 45 e8 fldl -0x18(%ebp) | ||
+ | 80483cc: dc 45 f0 faddl -0x10(%ebp) | ||
+ | 80483cf: dd 5d e0 fstpl -0x20(%ebp) | ||
+ | 80483d2: dd 45 e8 fldl -0x18(%ebp) | ||
+ | 80483d5: dc 45 f0 faddl -0x10(%ebp) | ||
+ | 80483d8: dd 5d e0 fstpl -0x20(%ebp) | ||
+ | 80483db: dd 45 e8 fldl -0x18(%ebp) | ||
+ | 80483de: dc 45 f0 faddl -0x10(%ebp) | ||
+ | 80483e1: dd 5d e0 fstpl -0x20(%ebp) | ||
+ | 80483e4: dd 45 e8 fldl -0x18(%ebp) | ||
+ | 80483e7: dc 45 f0 faddl -0x10(%ebp) | ||
+ | 80483ea: dd 5d e0 fstpl -0x20(%ebp) | ||
+ | 80483ed: dd 45 e8 fldl -0x18(%ebp) | ||
+ | 80483f0: dc 45 f0 faddl -0x10(%ebp) | ||
+ | 80483f3: dd 5d e0 fstpl -0x20(%ebp) | ||
+ | 80483f6: dd 45 e8 fldl -0x18(%ebp) | ||
+ | 80483f9: dc 45 f0 faddl -0x10(%ebp) | ||
+ | 80483fc: dd 5d e0 fstpl -0x20(%ebp) | ||
+ | 80483ff: dd 45 e8 fldl -0x18(%ebp) | ||
+ | 8048402: dc 45 f0 faddl -0x10(%ebp) | ||
+ | 8048405: dd 5d e0 fstpl -0x20(%ebp) | ||
+ | 8048408: dd 45 e8 fldl -0x18(%ebp) | ||
+ | 804840b: dc 45 f0 faddl -0x10(%ebp) | ||
+ | 804840e: dd 5d e0 fstpl -0x20(%ebp) | ||
+ | 8048411: eb 92 jmp 80483a5 <main+0x11> | ||
+ | 8048413: 90 nop | ||
+ | 8048414: 90 nop | ||
+ | 8048415: 90 nop | ||
+ | 8048416: 90 nop | ||
+ | 8048417: 90 nop | ||
+ | 8048418: 90 nop | ||
+ | 8048419: 90 nop | ||
+ | 804841a: 90 nop | ||
+ | 804841b: 90 nop | ||
+ | 804841c: 90 nop | ||
+ | 804841d: 90 nop | ||
+ | 804841e: 90 nop | ||
+ | 804841f: 90 nop | ||
+ | </code> | ||
+ | |||
+ | == unrolled fp sub test == | ||
+ | A while loop which takes two doubles and subtracts them multiple times in the same loop. \\ | ||
+ | {{:analytical_modeling_of_system_temperature:fpsub2.png}} | ||
+ | <code> | ||
+ | 08048394 <main>: | ||
+ | 8048394: 8d 4c 24 04 lea 0x4(%esp),%ecx | ||
+ | 8048398: 83 e4 f0 and $0xfffffff0,%esp | ||
+ | 804839b: ff 71 fc pushl -0x4(%ecx) | ||
+ | 804839e: 55 push %ebp | ||
+ | 804839f: 89 e5 mov %esp,%ebp | ||
+ | 80483a1: 51 push %ecx | ||
+ | 80483a2: 83 ec 24 sub $0x24,%esp | ||
+ | 80483a5: dd 05 e0 84 04 08 fldl 0x80484e0 | ||
+ | 80483ab: dd 5d f0 fstpl -0x10(%ebp) | ||
+ | 80483ae: dd 05 e8 84 04 08 fldl 0x80484e8 | ||
+ | 80483b4: dd 5d e8 fstpl -0x18(%ebp) | ||
+ | 80483b7: dd 45 e8 fldl -0x18(%ebp) | ||
+ | 80483ba: dc 65 f0 fsubl -0x10(%ebp) | ||
+ | 80483bd: dd 5d e0 fstpl -0x20(%ebp) | ||
+ | 80483c0: dd 45 e8 fldl -0x18(%ebp) | ||
+ | 80483c3: dc 65 f0 fsubl -0x10(%ebp) | ||
+ | 80483c6: dd 5d e0 fstpl -0x20(%ebp) | ||
+ | 80483c9: dd 45 e8 fldl -0x18(%ebp) | ||
+ | 80483cc: dc 65 f0 fsubl -0x10(%ebp) | ||
+ | 80483cf: dd 5d e0 fstpl -0x20(%ebp) | ||
+ | 80483d2: dd 45 e8 fldl -0x18(%ebp) | ||
+ | 80483d5: dc 65 f0 fsubl -0x10(%ebp) | ||
+ | 80483d8: dd 5d e0 fstpl -0x20(%ebp) | ||
+ | 80483db: dd 45 e8 fldl -0x18(%ebp) | ||
+ | 80483de: dc 65 f0 fsubl -0x10(%ebp) | ||
+ | 80483e1: dd 5d e0 fstpl -0x20(%ebp) | ||
+ | 80483e4: dd 45 e8 fldl -0x18(%ebp) | ||
+ | 80483e7: dc 65 f0 fsubl -0x10(%ebp) | ||
+ | 80483ea: dd 5d e0 fstpl -0x20(%ebp) | ||
+ | 80483ed: dd 45 e8 fldl -0x18(%ebp) | ||
+ | 80483f0: dc 65 f0 fsubl -0x10(%ebp) | ||
+ | 80483f3: dd 5d e0 fstpl -0x20(%ebp) | ||
+ | 80483f6: dd 45 e8 fldl -0x18(%ebp) | ||
+ | 80483f9: dc 65 f0 fsubl -0x10(%ebp) | ||
+ | 80483fc: dd 5d e0 fstpl -0x20(%ebp) | ||
+ | 80483ff: dd 45 e8 fldl -0x18(%ebp) | ||
+ | 8048402: dc 65 f0 fsubl -0x10(%ebp) | ||
+ | 8048405: dd 5d e0 fstpl -0x20(%ebp) | ||
+ | 8048408: dd 45 e8 fldl -0x18(%ebp) | ||
+ | 804840b: dc 65 f0 fsubl -0x10(%ebp) | ||
+ | 804840e: dd 5d e0 fstpl -0x20(%ebp) | ||
+ | 8048411: eb 92 jmp 80483a5 <main+0x11> | ||
+ | 8048413: 90 nop | ||
+ | 8048414: 90 nop | ||
+ | 8048415: 90 nop | ||
+ | 8048416: 90 nop | ||
+ | 8048417: 90 nop | ||
+ | 8048418: 90 nop | ||
+ | 8048419: 90 nop | ||
+ | 804841a: 90 nop | ||
+ | 804841b: 90 nop | ||
+ | 804841c: 90 nop | ||
+ | 804841d: 90 nop | ||
+ | 804841e: 90 nop | ||
+ | 804841f: 90 nop | ||
+ | </code> | ||
+ | |||
+ | == unrolled fp mul test == | ||
+ | A while loop which takes two doubles and multiplies them multiple times in the same loop. As in the case of fp mul test, we avoid overflow.\\ | ||
+ | {{:analytical_modeling_of_system_temperature:fpmul2.png}} | ||
+ | <code> | ||
+ | 08048394 <main>: | ||
+ | 8048394: 8d 4c 24 04 lea 0x4(%esp),%ecx | ||
+ | 8048398: 83 e4 f0 and $0xfffffff0,%esp | ||
+ | 804839b: ff 71 fc pushl -0x4(%ecx) | ||
+ | 804839e: 55 push %ebp | ||
+ | 804839f: 89 e5 mov %esp,%ebp | ||
+ | 80483a1: 51 push %ecx | ||
+ | 80483a2: 83 ec 24 sub $0x24,%esp | ||
+ | 80483a5: dd 05 e0 84 04 08 fldl 0x80484e0 | ||
+ | 80483ab: dd 5d f0 fstpl -0x10(%ebp) | ||
+ | 80483ae: dd 05 e8 84 04 08 fldl 0x80484e8 | ||
+ | 80483b4: dd 5d e8 fstpl -0x18(%ebp) | ||
+ | 80483b7: dd 45 f0 fldl -0x10(%ebp) | ||
+ | 80483ba: dc 4d e8 fmull -0x18(%ebp) | ||
+ | 80483bd: dd 5d e0 fstpl -0x20(%ebp) | ||
+ | 80483c0: dd 45 f0 fldl -0x10(%ebp) | ||
+ | 80483c3: dc 4d e8 fmull -0x18(%ebp) | ||
+ | 80483c6: dd 5d e0 fstpl -0x20(%ebp) | ||
+ | 80483c9: dd 45 f0 fldl -0x10(%ebp) | ||
+ | 80483cc: dc 4d e8 fmull -0x18(%ebp) | ||
+ | 80483cf: dd 5d e0 fstpl -0x20(%ebp) | ||
+ | 80483d2: dd 45 f0 fldl -0x10(%ebp) | ||
+ | 80483d5: dc 4d e8 fmull -0x18(%ebp) | ||
+ | 80483d8: dd 5d e0 fstpl -0x20(%ebp) | ||
+ | 80483db: dd 45 f0 fldl -0x10(%ebp) | ||
+ | 80483de: dc 4d e8 fmull -0x18(%ebp) | ||
+ | 80483e1: dd 5d e0 fstpl -0x20(%ebp) | ||
+ | 80483e4: dd 45 f0 fldl -0x10(%ebp) | ||
+ | 80483e7: dc 4d e8 fmull -0x18(%ebp) | ||
+ | 80483ea: dd 5d e0 fstpl -0x20(%ebp) | ||
+ | 80483ed: dd 45 f0 fldl -0x10(%ebp) | ||
+ | 80483f0: dc 4d e8 fmull -0x18(%ebp) | ||
+ | 80483f3: dd 5d e0 fstpl -0x20(%ebp) | ||
+ | 80483f6: dd 45 f0 fldl -0x10(%ebp) | ||
+ | 80483f9: dc 4d e8 fmull -0x18(%ebp) | ||
+ | 80483fc: dd 5d e0 fstpl -0x20(%ebp) | ||
+ | 80483ff: dd 45 f0 fldl -0x10(%ebp) | ||
+ | 8048402: dc 4d e8 fmull -0x18(%ebp) | ||
+ | 8048405: dd 5d e0 fstpl -0x20(%ebp) | ||
+ | 8048408: dd 45 f0 fldl -0x10(%ebp) | ||
+ | 804840b: dc 4d e8 fmull -0x18(%ebp) | ||
+ | 804840e: dd 5d e0 fstpl -0x20(%ebp) | ||
+ | 8048411: eb 92 jmp 80483a5 <main+0x11> | ||
+ | 8048413: 90 nop | ||
+ | 8048414: 90 nop | ||
+ | 8048415: 90 nop | ||
+ | 8048416: 90 nop | ||
+ | 8048417: 90 nop | ||
+ | 8048418: 90 nop | ||
+ | 8048419: 90 nop | ||
+ | 804841a: 90 nop | ||
+ | 804841b: 90 nop | ||
+ | 804841c: 90 nop | ||
+ | 804841d: 90 nop | ||
+ | 804841e: 90 nop | ||
+ | 804841f: 90 nop | ||
+ | </code> | ||
+ | |||
+ | == unrolled fp div test == | ||
+ | A while loop which takes two doubles and divides them multiple times in the same loop.\\ | ||
+ | {{:analytical_modeling_of_system_temperature:fpdiv2.png}} | ||
+ | <code> | ||
+ | 08048394 <main>: | ||
+ | 8048394: 8d 4c 24 04 lea 0x4(%esp),%ecx | ||
+ | 8048398: 83 e4 f0 and $0xfffffff0,%esp | ||
+ | 804839b: ff 71 fc pushl -0x4(%ecx) | ||
+ | 804839e: 55 push %ebp | ||
+ | 804839f: 89 e5 mov %esp,%ebp | ||
+ | 80483a1: 51 push %ecx | ||
+ | 80483a2: 83 ec 24 sub $0x24,%esp | ||
+ | 80483a5: dd 05 e0 84 04 08 fldl 0x80484e0 | ||
+ | 80483ab: dd 5d f0 fstpl -0x10(%ebp) | ||
+ | 80483ae: dd 05 e8 84 04 08 fldl 0x80484e8 | ||
+ | 80483b4: dd 5d e8 fstpl -0x18(%ebp) | ||
+ | 80483b7: dd 45 e8 fldl -0x18(%ebp) | ||
+ | 80483ba: dc 75 f0 fdivl -0x10(%ebp) | ||
+ | 80483bd: dd 5d e0 fstpl -0x20(%ebp) | ||
+ | 80483c0: dd 45 e8 fldl -0x18(%ebp) | ||
+ | 80483c3: dc 75 f0 fdivl -0x10(%ebp) | ||
+ | 80483c6: dd 5d e0 fstpl -0x20(%ebp) | ||
+ | 80483c9: dd 45 e8 fldl -0x18(%ebp) | ||
+ | 80483cc: dc 75 f0 fdivl -0x10(%ebp) | ||
+ | 80483cf: dd 5d e0 fstpl -0x20(%ebp) | ||
+ | 80483d2: dd 45 e8 fldl -0x18(%ebp) | ||
+ | 80483d5: dc 75 f0 fdivl -0x10(%ebp) | ||
+ | 80483d8: dd 5d e0 fstpl -0x20(%ebp) | ||
+ | 80483db: dd 45 e8 fldl -0x18(%ebp) | ||
+ | 80483de: dc 75 f0 fdivl -0x10(%ebp) | ||
+ | 80483e1: dd 5d e0 fstpl -0x20(%ebp) | ||
+ | 80483e4: dd 45 e8 fldl -0x18(%ebp) | ||
+ | 80483e7: dc 75 f0 fdivl -0x10(%ebp) | ||
+ | 80483ea: dd 5d e0 fstpl -0x20(%ebp) | ||
+ | 80483ed: dd 45 e8 fldl -0x18(%ebp) | ||
+ | 80483f0: dc 75 f0 fdivl -0x10(%ebp) | ||
+ | 80483f3: dd 5d e0 fstpl -0x20(%ebp) | ||
+ | 80483f6: dd 45 e8 fldl -0x18(%ebp) | ||
+ | 80483f9: dc 75 f0 fdivl -0x10(%ebp) | ||
+ | 80483fc: dd 5d e0 fstpl -0x20(%ebp) | ||
+ | 80483ff: dd 45 e8 fldl -0x18(%ebp) | ||
+ | 8048402: dc 75 f0 fdivl -0x10(%ebp) | ||
+ | 8048405: dd 5d e0 fstpl -0x20(%ebp) | ||
+ | 8048408: dd 45 e8 fldl -0x18(%ebp) | ||
+ | 804840b: dc 75 f0 fdivl -0x10(%ebp) | ||
+ | 804840e: dd 5d e0 fstpl -0x20(%ebp) | ||
+ | 8048411: eb 92 jmp 80483a5 <main+0x11> | ||
+ | 8048413: 90 nop | ||
+ | 8048414: 90 nop | ||
+ | 8048415: 90 nop | ||
+ | 8048416: 90 nop | ||
+ | 8048417: 90 nop | ||
+ | 8048418: 90 nop | ||
+ | 8048419: 90 nop | ||
+ | 804841a: 90 nop | ||
+ | 804841b: 90 nop | ||
+ | 804841c: 90 nop | ||
+ | 804841d: 90 nop | ||
+ | 804841e: 90 nop | ||
+ | 804841f: 90 nop | ||
+ | </code> | ||
+ | |||
+ | ===Linked-list Cache Miss Test=== | ||
+ | For a linked-list test, every cache miss depends on the previous cache miss. Thus, there is limited runahead. | ||
+ | == cache miss with nops == | ||
+ | We create a large linked-list and traverse through it (while loop) in an essentially pointer chasing manner. While the miss is being serviced, the machine executes nops (added in the assembly code before the end of each loop). We increase the number of nops in the loop to decrease the cache miss rate (more nops in loop implies more time between requests to memory for data). Thus, lower the cache miss rate, higher the number of nops executed per loop and thus, lower the peak temperature.\\ | ||
+ | {{:analytical_modeling_of_system_temperature:miss_nops_new.png?1000}} | ||
+ | <code> | ||
+ | 080483f4 <main>: | ||
+ | 80483f4: 8d 4c 24 04 lea 0x4(%esp),%ecx | ||
+ | 80483f8: 83 e4 f0 and $0xfffffff0,%esp | ||
+ | 80483fb: ff 71 fc pushl -0x4(%ecx) | ||
+ | 80483fe: 55 push %ebp | ||
+ | 80483ff: 89 e5 mov %esp,%ebp | ||
+ | 8048401: 51 push %ecx | ||
+ | 8048402: 83 ec 44 sub $0x44,%esp | ||
+ | 8048405: c7 44 24 04 04 00 00 movl $0x4,0x4(%esp) | ||
+ | 804840c: 00 | ||
+ | 804840d: c7 04 24 55 90 04 08 movl $0x8049055,(%esp) | ||
+ | 8048414: e8 13 ff ff ff call 804832c <printf@plt> | ||
+ | 8048419: c7 44 24 04 40 00 00 movl $0x40,0x4(%esp) | ||
+ | 8048420: 00 | ||
+ | 8048421: c7 04 24 01 00 00 00 movl $0x1,(%esp) | ||
+ | 8048428: e8 df fe ff ff call 804830c <calloc@plt> | ||
+ | 804842d: 89 45 f0 mov %eax,-0x10(%ebp) | ||
+ | 8048430: 8b 45 f0 mov -0x10(%ebp),%eax | ||
+ | 8048433: 89 45 ec mov %eax,-0x14(%ebp) | ||
+ | 8048436: c7 45 f4 00 00 00 00 movl $0x0,-0xc(%ebp) | ||
+ | 804843d: eb 34 jmp 8048473 <main+0x7f> | ||
+ | 804843f: c7 44 24 04 40 00 00 movl $0x40,0x4(%esp) | ||
+ | 8048446: 00 | ||
+ | 8048447: c7 04 24 01 00 00 00 movl $0x1,(%esp) | ||
+ | 804844e: e8 b9 fe ff ff call 804830c <calloc@plt> | ||
+ | 8048453: 89 45 e8 mov %eax,-0x18(%ebp) | ||
+ | 8048456: 8b 45 e8 mov -0x18(%ebp),%eax | ||
+ | 8048459: c7 40 3c 00 00 00 00 movl $0x0,0x3c(%eax) | ||
+ | 8048460: 8b 55 ec mov -0x14(%ebp),%edx | ||
+ | 8048463: 8b 45 e8 mov -0x18(%ebp),%eax | ||
+ | 8048466: 89 42 3c mov %eax,0x3c(%edx) | ||
+ | 8048469: 8b 45 e8 mov -0x18(%ebp),%eax | ||
+ | 804846c: 89 45 ec mov %eax,-0x14(%ebp) | ||
+ | 804846f: 83 45 f4 01 addl $0x1,-0xc(%ebp) | ||
+ | 8048473: 81 7d f4 7f 96 98 00 cmpl $0x98967f,-0xc(%ebp) | ||
+ | 804847a: 7e c3 jle 804843f <main+0x4b> | ||
+ | 804847c: 8b 45 ec mov -0x14(%ebp),%eax | ||
+ | 804847f: 8b 55 f0 mov -0x10(%ebp),%edx | ||
+ | 8048482: 89 50 3c mov %edx,0x3c(%eax) | ||
+ | 8048485: 8b 45 f0 mov -0x10(%ebp),%eax | ||
+ | 8048488: 89 45 ec mov %eax,-0x14(%ebp) | ||
+ | 804848b: e9 e7 0a 00 00 jmp 8048f77 <main+0xb83> | ||
+ | 8048490: 8b 45 ec mov -0x14(%ebp),%eax | ||
+ | 8048493: 8b 40 3c mov 0x3c(%eax),%eax | ||
+ | 8048496: 89 45 ec mov %eax,-0x14(%ebp) | ||
+ | 8048499: 90 nop | ||
+ | 804849a: 90 nop | ||
+ | 804849b: 90 nop | ||
+ | |||
+ | //lots and lots of nops to vary cache miss rate | ||
+ | |||
+ | 8048f76: 90 nop | ||
+ | 8048f77: 8b 45 ec mov -0x14(%ebp),%eax | ||
+ | 8048f7a: 8b 40 3c mov 0x3c(%eax),%eax | ||
+ | 8048f7d: 85 c0 test %eax,%eax | ||
+ | 8048f7f: 0f 85 0b f5 ff ff jne 8048490 <main+0x9c> | ||
+ | 8048f85: 83 c4 44 add $0x44,%esp | ||
+ | 8048f88: 59 pop %ecx | ||
+ | 8048f89: 5d pop %ebp | ||
+ | 8048f8a: 8d 61 fc lea -0x4(%ecx),%esp | ||
+ | 8048f8d: c3 ret | ||
+ | 8048f8e: 90 nop | ||
+ | 8048f8f: 90 nop | ||
+ | </code> | ||
+ | |||
+ | == cache miss with ops == | ||
+ | Same as above except we now have some fp operations in each loop instead of nops. Thus, while a miss is being serviced, the system has work to do. | ||
+ | We increase the amount of work to do in each loop to decrease the cache miss rate. Thus, lower the cache miss rate, more is the work done per loop and thus, higher the peak temperature. Note that this is also the expected behavior with cache misses. More the cache miss rate, more the amount of time spent in servicing them. Thus, cpu is stalled for more time. Thus, lower the peak temperature.\\ | ||
+ | {{:analytical_modeling_of_system_temperature:miss_ops_new.png?1000}} | ||
+ | <code> | ||
+ | 08048424 <main>: | ||
+ | 8048424: 8d 4c 24 04 lea 0x4(%esp),%ecx | ||
+ | 8048428: 83 e4 f0 and $0xfffffff0,%esp | ||
+ | 804842b: ff 71 fc pushl -0x4(%ecx) | ||
+ | 804842e: 55 push %ebp | ||
+ | 804842f: 89 e5 mov %esp,%ebp | ||
+ | 8048431: 53 push %ebx | ||
+ | 8048432: 51 push %ecx | ||
+ | 8048433: 83 ec 40 sub $0x40,%esp | ||
+ | 8048436: 89 cb mov %ecx,%ebx | ||
+ | 8048438: c7 44 24 04 04 00 00 movl $0x4,0x4(%esp) | ||
+ | 804843f: 00 | ||
+ | 8048440: c7 04 24 e5 85 04 08 movl $0x80485e5,(%esp) | ||
+ | 8048447: e8 04 ff ff ff call 8048350 <printf@plt> | ||
+ | 804844c: c7 44 24 04 40 00 00 movl $0x40,0x4(%esp) | ||
+ | 8048453: 00 | ||
+ | 8048454: c7 04 24 01 00 00 00 movl $0x1,(%esp) | ||
+ | 804845b: e8 d0 fe ff ff call 8048330 <calloc@plt> | ||
+ | 8048460: 89 45 f0 mov %eax,-0x10(%ebp) | ||
+ | 8048463: 8b 45 f0 mov -0x10(%ebp),%eax | ||
+ | 8048466: 89 45 ec mov %eax,-0x14(%ebp) | ||
+ | 8048469: 8b 43 04 mov 0x4(%ebx),%eax | ||
+ | 804846c: 83 c0 04 add $0x4,%eax | ||
+ | 804846f: 8b 00 mov (%eax),%eax | ||
+ | 8048471: 89 04 24 mov %eax,(%esp) | ||
+ | 8048474: e8 e7 fe ff ff call 8048360 <atoi@plt> | ||
+ | 8048479: 89 45 e4 mov %eax,-0x1c(%ebp) | ||
+ | 804847c: c7 45 f4 00 00 00 00 movl $0x0,-0xc(%ebp) | ||
+ | 8048483: eb 34 jmp 80484b9 <main+0x95> | ||
+ | 8048485: c7 44 24 04 40 00 00 movl $0x40,0x4(%esp) | ||
+ | 804848c: 00 | ||
+ | 804848d: c7 04 24 01 00 00 00 movl $0x1,(%esp) | ||
+ | 8048494: e8 97 fe ff ff call 8048330 <calloc@plt> | ||
+ | 8048499: 89 45 e8 mov %eax,-0x18(%ebp) | ||
+ | 804849c: 8b 45 e8 mov -0x18(%ebp),%eax | ||
+ | 804849f: c7 40 3c 00 00 00 00 movl $0x0,0x3c(%eax) | ||
+ | 80484a6: 8b 55 ec mov -0x14(%ebp),%edx | ||
+ | 80484a9: 8b 45 e8 mov -0x18(%ebp),%eax | ||
+ | 80484ac: 89 42 3c mov %eax,0x3c(%edx) | ||
+ | 80484af: 8b 45 e8 mov -0x18(%ebp),%eax | ||
+ | 80484b2: 89 45 ec mov %eax,-0x14(%ebp) | ||
+ | 80484b5: 83 45 f4 01 addl $0x1,-0xc(%ebp) | ||
+ | 80484b9: 81 7d f4 7f 96 98 00 cmpl $0x98967f,-0xc(%ebp) | ||
+ | 80484c0: 7e c3 jle 8048485 <main+0x61> | ||
+ | 80484c2: 8b 45 ec mov -0x14(%ebp),%eax | ||
+ | 80484c5: 8b 55 f0 mov -0x10(%ebp),%edx | ||
+ | 80484c8: 89 50 3c mov %edx,0x3c(%eax) | ||
+ | 80484cb: 8b 45 f0 mov -0x10(%ebp),%eax | ||
+ | 80484ce: 89 45 ec mov %eax,-0x14(%ebp) | ||
+ | 80484d1: eb 39 jmp 804850c <main+0xe8> | ||
+ | 80484d3: 8b 45 ec mov -0x14(%ebp),%eax | ||
+ | 80484d6: 8b 40 3c mov 0x3c(%eax),%eax | ||
+ | 80484d9: 89 45 ec mov %eax,-0x14(%ebp) | ||
+ | 80484dc: c7 45 f4 00 00 00 00 movl $0x0,-0xc(%ebp) | ||
+ | 80484e3: eb 1f jmp 8048504 <main+0xe0> | ||
+ | 80484e5: dd 05 f0 85 04 08 fldl 0x80485f0 // work to do | ||
+ | 80484eb: dd 5d d8 fstpl -0x28(%ebp) | ||
+ | 80484ee: dd 05 f8 85 04 08 fldl 0x80485f8 | ||
+ | 80484f4: dd 5d d0 fstpl -0x30(%ebp) | ||
+ | 80484f7: dd 45 d8 fldl -0x28(%ebp) | ||
+ | 80484fa: dc 4d d0 fmull -0x30(%ebp) | ||
+ | 80484fd: dd 5d c8 fstpl -0x38(%ebp) | ||
+ | 8048500: 83 45 f4 01 addl $0x1,-0xc(%ebp) | ||
+ | 8048504: 8b 45 f4 mov -0xc(%ebp),%eax | ||
+ | 8048507: 3b 45 e4 cmp -0x1c(%ebp),%eax | ||
+ | 804850a: 7c d9 jl 80484e5 <main+0xc1> // jump back to do more work within the same loop | ||
+ | 804850c: 8b 45 ec mov -0x14(%ebp),%eax | ||
+ | 804850f: 8b 40 3c mov 0x3c(%eax),%eax | ||
+ | 8048512: 85 c0 test %eax,%eax | ||
+ | 8048514: 75 bd jne 80484d3 <main+0xaf> // end of one loop | ||
+ | 8048516: 83 c4 40 add $0x40,%esp | ||
+ | 8048519: 59 pop %ecx | ||
+ | 804851a: 5b pop %ebx | ||
+ | 804851b: 5d pop %ebp | ||
+ | 804851c: 8d 61 fc lea -0x4(%ecx),%esp | ||
+ | 804851f: c3 ret | ||
+ | </code> | ||
+ | |||
+ | ===Linked-list Branch Misprediction Test=== | ||
+ | We wrap a simple branching condition around the linked-list test. However, even though we may mispredict the branch and go ahead and fetch a cache miss, it might be useful later on. Thus, a mispredicted branch is not totally useless.\\ | ||
+ | |||
+ | We use RAND_MAX-1 and RANDMAX/2 as our comparators against the random number generated. This creates different branch misprediction rates.\\ | ||
+ | |||
+ | Results indicate that higher the branch misprediction rate, higher is the cpu peak temperature. We believe this to be due to the work done in flushing the pipelines. | ||
+ | {{:analytical_modeling_of_system_temperature:mispred.png?1000}} | ||
+ | <code> | ||
+ | 08048424 <main>: | ||
+ | 8048424: 8d 4c 24 04 lea 0x4(%esp),%ecx | ||
+ | 8048428: 83 e4 f0 and $0xfffffff0,%esp | ||
+ | 804842b: ff 71 fc pushl -0x4(%ecx) | ||
+ | 804842e: 55 push %ebp | ||
+ | 804842f: 89 e5 mov %esp,%ebp | ||
+ | 8048431: 51 push %ecx | ||
+ | 8048432: 83 ec 44 sub $0x44,%esp | ||
+ | 8048435: c7 44 24 04 04 00 00 movl $0x4,0x4(%esp) | ||
+ | 804843c: 00 | ||
+ | 804843d: c7 04 24 a5 85 04 08 movl $0x80485a5,(%esp) | ||
+ | 8048444: e8 07 ff ff ff call 8048350 <printf@plt> | ||
+ | 8048449: c7 44 24 04 40 00 00 movl $0x40,0x4(%esp) | ||
+ | 8048450: 00 | ||
+ | 8048451: c7 04 24 01 00 00 00 movl $0x1,(%esp) | ||
+ | 8048458: e8 d3 fe ff ff call 8048330 <calloc@plt> | ||
+ | 804845d: 89 45 f0 mov %eax,-0x10(%ebp) | ||
+ | 8048460: 8b 45 f0 mov -0x10(%ebp),%eax | ||
+ | 8048463: 89 45 ec mov %eax,-0x14(%ebp) | ||
+ | 8048466: c7 45 f4 00 00 00 00 movl $0x0,-0xc(%ebp) | ||
+ | 804846d: eb 34 jmp 80484a3 <main+0x7f> | ||
+ | 804846f: c7 44 24 04 40 00 00 movl $0x40,0x4(%esp) | ||
+ | 8048476: 00 | ||
+ | 8048477: c7 04 24 01 00 00 00 movl $0x1,(%esp) | ||
+ | 804847e: e8 ad fe ff ff call 8048330 <calloc@plt> | ||
+ | 8048483: 89 45 e8 mov %eax,-0x18(%ebp) | ||
+ | 8048486: 8b 45 e8 mov -0x18(%ebp),%eax | ||
+ | 8048489: c7 40 3c 00 00 00 00 movl $0x0,0x3c(%eax) | ||
+ | 8048490: 8b 55 ec mov -0x14(%ebp),%edx | ||
+ | 8048493: 8b 45 e8 mov -0x18(%ebp),%eax | ||
+ | 8048496: 89 42 3c mov %eax,0x3c(%edx) | ||
+ | 8048499: 8b 45 e8 mov -0x18(%ebp),%eax | ||
+ | 804849c: 89 45 ec mov %eax,-0x14(%ebp) | ||
+ | 804849f: 83 45 f4 01 addl $0x1,-0xc(%ebp) | ||
+ | 80484a3: 81 7d f4 7f 96 98 00 cmpl $0x98967f,-0xc(%ebp) | ||
+ | 80484aa: 7e c3 jle 804846f <main+0x4b> | ||
+ | 80484ac: 8b 45 ec mov -0x14(%ebp),%eax | ||
+ | 80484af: 8b 55 f0 mov -0x10(%ebp),%edx | ||
+ | 80484b2: 89 50 3c mov %edx,0x3c(%eax) | ||
+ | 80484b5: 8b 45 f0 mov -0x10(%ebp),%eax | ||
+ | 80484b8: 89 45 ec mov %eax,-0x14(%ebp) | ||
+ | 80484bb: e8 a0 fe ff ff call 8048360 <rand@plt> | ||
+ | 80484c0: 89 45 f4 mov %eax,-0xc(%ebp) | ||
+ | 80484c3: 81 7d f4 fe ff ff 3f cmpl $0x3ffffffe,-0xc(%ebp) // This is RAND_MAX/2 used for the red plot. | ||
+ | 80484ca: 7f ef jg 80484bb <main+0x97> // We use RAND_MAX-1 for the blue plot. | ||
+ | 80484cc: 8b 45 ec mov -0x14(%ebp),%eax | ||
+ | 80484cf: 8b 40 3c mov 0x3c(%eax),%eax | ||
+ | 80484d2: 89 45 ec mov %eax,-0x14(%ebp) | ||
+ | 80484d5: eb e4 jmp 80484bb <main+0x97> | ||
+ | 80484d7: 90 nop | ||
+ | 80484d8: 90 nop | ||
+ | 80484d9: 90 nop | ||
+ | 80484da: 90 nop | ||
+ | 80484db: 90 nop | ||
+ | 80484dc: 90 nop | ||
+ | 80484dd: 90 nop | ||
+ | 80484de: 90 nop | ||
+ | 80484df: 90 nop | ||
+ | </code> | ||
+ | |||
+ | ===Array element-fetching Cache Miss Test=== | ||
+ | We create a huge array and load elements from it. A cache miss does not depend on the previous cache miss as was in the case of linked-list test. | ||
+ | Again, we add nops to reduce the cache miss rate. Thus, lower the cache miss rate, lower is the peak temperature. \\ | ||
+ | {{:analytical_modeling_of_system_temperature:arraymiss2.png?1000}} | ||
+ | <code> | ||
+ | 080483f4 <main>: | ||
+ | 80483f4: 8d 4c 24 04 lea 0x4(%esp),%ecx | ||
+ | 80483f8: 83 e4 f0 and $0xfffffff0,%esp | ||
+ | 80483fb: ff 71 fc pushl -0x4(%ecx) | ||
+ | 80483fe: 55 push %ebp | ||
+ | 80483ff: 89 e5 mov %esp,%ebp | ||
+ | 8048401: 51 push %ecx | ||
+ | 8048402: 83 ec 24 sub $0x24,%esp | ||
+ | 8048405: c7 44 24 04 04 00 00 movl $0x4,0x4(%esp) | ||
+ | 804840c: 00 | ||
+ | 804840d: c7 04 24 25 87 04 08 movl $0x8048725,(%esp) | ||
+ | 8048414: e8 13 ff ff ff call 804832c <printf@plt> | ||
+ | 8048419: c7 44 24 04 04 00 00 movl $0x4,0x4(%esp) | ||
+ | 8048420: 00 | ||
+ | 8048421: c7 04 24 00 00 90 01 movl $0x1900000,(%esp) | ||
+ | 8048428: e8 df fe ff ff call 804830c <calloc@plt> | ||
+ | 804842d: 89 45 f8 mov %eax,-0x8(%ebp) | ||
+ | 8048430: c7 45 f4 00 00 00 00 movl $0x0,-0xc(%ebp) | ||
+ | 8048437: eb 13 jmp 804844c <main+0x58> | ||
+ | 8048439: 8b 45 f4 mov -0xc(%ebp),%eax | ||
+ | 804843c: c1 e0 02 shl $0x2,%eax | ||
+ | 804843f: 03 45 f8 add -0x8(%ebp),%eax | ||
+ | 8048442: c7 00 10 00 00 00 movl $0x10,(%eax) | ||
+ | 8048448: 83 45 f4 01 addl $0x1,-0xc(%ebp) | ||
+ | 804844c: 81 7d f4 ff ff 8f 01 cmpl $0x18fffff,-0xc(%ebp) | ||
+ | 8048453: 7e e4 jle 8048439 <main+0x45> | ||
+ | 8048455: c7 45 f4 00 00 00 00 movl $0x0,-0xc(%ebp) | ||
+ | 804845c: e9 e8 01 00 00 jmp 8048649 <main+0x255> | ||
+ | 8048461: 8b 45 f4 mov -0xc(%ebp),%eax | ||
+ | 8048464: c1 e0 02 shl $0x2,%eax | ||
+ | 8048467: 03 45 f8 add -0x8(%ebp),%eax | ||
+ | 804846a: 8b 00 mov (%eax),%eax | ||
+ | 804846c: 83 45 f4 10 addl $0x10,-0xc(%ebp) | ||
+ | 8048470: 90 nop | ||
+ | 8048471: 90 nop | ||
+ | 8048472: 90 nop | ||
+ | |||
+ | \\ lots of nops to vary cache miss rate | ||
+ | |||
+ | 8048645: 90 nop | ||
+ | 8048646: 90 nop | ||
+ | 8048647: 90 nop | ||
+ | 8048648: 90 nop | ||
+ | 8048649: 81 7d f4 ff ff 8f 01 cmpl $0x18fffff,-0xc(%ebp) | ||
+ | 8048650: 0f 8e 0b fe ff ff jle 8048461 <main+0x6d> | ||
+ | 8048656: e9 fa fd ff ff jmp 8048455 <main+0x61> | ||
+ | 804865b: 90 nop | ||
+ | 804865c: 90 nop | ||
+ | 804865d: 90 nop | ||
+ | 804865e: 90 nop | ||
+ | 804865f: 90 nop | ||
+ | </code> | ||
+ | |||
+ | |||
+ | ===Array element-fetching Branch Misprediction Test=== | ||
+ | We wrap a branch condition around the array cache miss code. In the 'if' part, we fetch the next element, in the 'else' part, we fetch the ((array_size/2)+1)st element. Since the array is large, a branch misprediction fetches useless data. Thus, we'll have to flush often (and thus stall more) for a higher branch misprediction rate. Reults indicate that higher the branch misprediction rate, lower is the peak temperature. Though this seems normal, the linked-list branch misprediction test suggested the opposite (not sure why).\\ | ||
+ | {{:analytical_modeling_of_system_temperature:arraymisp.png?1000}} | ||
+ | <code> | ||
+ | 08048454 <main>: | ||
+ | 8048454: 8d 4c 24 04 lea 0x4(%esp),%ecx | ||
+ | 8048458: 83 e4 f0 and $0xfffffff0,%esp | ||
+ | 804845b: ff 71 fc pushl -0x4(%ecx) | ||
+ | 804845e: 55 push %ebp | ||
+ | 804845f: 89 e5 mov %esp,%ebp | ||
+ | 8048461: 51 push %ecx | ||
+ | 8048462: 83 ec 34 sub $0x34,%esp | ||
+ | 8048465: c7 45 ec fd 5c 1e 6e movl $0x6e1e5cfd,-0x14(%ebp) | ||
+ | 804846c: c7 44 24 04 04 00 00 movl $0x4,0x4(%esp) | ||
+ | 8048473: 00 | ||
+ | 8048474: c7 04 24 00 00 90 01 movl $0x1900000,(%esp) | ||
+ | 804847b: e8 dc fe ff ff call 804835c <calloc@plt> | ||
+ | 8048480: 89 45 e8 mov %eax,-0x18(%ebp) | ||
+ | 8048483: c7 04 24 00 00 00 00 movl $0x0,(%esp) | ||
+ | 804848a: e8 ed fe ff ff call 804837c <time@plt> | ||
+ | 804848f: 89 04 24 mov %eax,(%esp) | ||
+ | 8048492: e8 a5 fe ff ff call 804833c <srand@plt> | ||
+ | 8048497: c7 45 f0 ef 03 00 00 movl $0x3ef,-0x10(%ebp) | ||
+ | 804849e: c7 45 f8 00 00 00 00 movl $0x0,-0x8(%ebp) | ||
+ | 80484a5: eb 31 jmp 80484d8 <main+0x84> | ||
+ | 80484a7: e8 e0 fe ff ff call 804838c <rand@plt> | ||
+ | 80484ac: 89 45 f4 mov %eax,-0xc(%ebp) | ||
+ | 80484af: 8b 45 f4 mov -0xc(%ebp),%eax | ||
+ | 80484b2: 3b 45 ec cmp -0x14(%ebp),%eax | ||
+ | 80484b5: 7d 0d jge 80484c4 <main+0x70> | ||
+ | 80484b7: 8b 45 f8 mov -0x8(%ebp),%eax | ||
+ | 80484ba: c1 e0 02 shl $0x2,%eax | ||
+ | 80484bd: 03 45 e8 add -0x18(%ebp),%eax | ||
+ | 80484c0: 8b 00 mov (%eax),%eax | ||
+ | 80484c2: eb 10 jmp 80484d4 <main+0x80> | ||
+ | 80484c4: 8b 45 f8 mov -0x8(%ebp),%eax | ||
+ | 80484c7: 05 00 00 c8 00 add $0xc80000,%eax | ||
+ | 80484cc: c1 e0 02 shl $0x2,%eax | ||
+ | 80484cf: 03 45 e8 add -0x18(%ebp),%eax | ||
+ | 80484d2: 8b 00 mov (%eax),%eax | ||
+ | 80484d4: 83 45 f8 10 addl $0x10,-0x8(%ebp) | ||
+ | 80484d8: 81 7d f8 ff ff c7 00 cmpl $0xc7ffff,-0x8(%ebp) | ||
+ | 80484df: 7e c6 jle 80484a7 <main+0x53> | ||
+ | 80484e1: eb bb jmp 804849e <main+0x4a> | ||
+ | 80484e3: 90 nop | ||
+ | 80484e4: 90 nop | ||
+ | 80484e5: 90 nop | ||
+ | 80484e6: 90 nop | ||
+ | 80484e7: 90 nop | ||
+ | 80484e8: 90 nop | ||
+ | 80484e9: 90 nop | ||
+ | 80484ea: 90 nop | ||
+ | 80484eb: 90 nop | ||
+ | 80484ec: 90 nop | ||
+ | 80484ed: 90 nop | ||
+ | 80484ee: 90 nop | ||
+ | 80484ef: 90 nop | ||
</code> | </code> |