; Hand this in to: ece849-staff+hw@ece.cmu.edu ;Required Readings ; NOTE: the BART reading is a part of the specification for the San Francisco ; subway/rail system. Please comment on what you think about the ; technical issues it presents (and leaves out), not its merits ; as a scholarly paper (which it obviously isn't). ; Do you think it is a good spec? Is it obviously missing out on things ; within the scope of critical system requirements? @article{bart81_safety_spec.pdf, title = "BART (San Francisco Bay Area Rapid Transit District), System Safety specification", year = "1981", url = "http://www.ece.cmu.edu/~ece849/papers/bart81_safety_spec.pdf", studentname = "", summary = "", contribution1 ="", contribution2 ="", contribution3 ="", contribution4 ="", contribution5 ="", weakness1 = "", weakness2 = "", weakness3 = "", weakness4 = "", weakness5 = "", interesting = "high/med/low", opinions = "", } @Conference{butler93_infeasibility_quantifying, author = "Butler, R.W. and Finelli, G.B.", title = "The infeasibility of experimental quantification of life-critical software reliability", inbook = "SIGSOFT Software Engineering Notes 16, no. 5, (Dec. 1991): 66-76 Journal Code: SIGSOFT Softw. Eng. Notes (USA)", abstract = "It is affirmed that quantification of life-critical software reliability is infeasible using statistical methods, whether applied to standard software or fault-tolerant software. The key assumption of software fault tolerance-separately programmed versions fail independently-is shown to be problematic. This assumption cannot be justified by experimentation in the ultrareliability region and subjective arguments in its favor are not sufficiently strong to justify it as an axiom. Also, the implications of multiversion software experiments support this affirmation", url = "http://ieeexplore.ieee.org/iel1/32/5456/00210303.pdf", studentname = "", summary = "", contribution1 ="", contribution2 ="", contribution3 ="", contribution4 ="", contribution5 ="", weakness1 = "", weakness2 = "", weakness3 = "", weakness4 = "", weakness5 = "", interesting = "high/med/low", opinions = "", } @article{myers86_sdi_error_free.pdf, author = "Myers, W.", title = "Can software for the Strategic Defense Initiative ever be error-free? ", journal = "Computer", year = "1986", volume = "19", number = "11", pages = "61-67", url = "http://www.ece.cmu.edu/~ece849/papers/myers86_sdi_error_free.pdf", studentname = "", summary = "", contribution1 ="", contribution2 ="", contribution3 ="", contribution4 ="", contribution5 ="", weakness1 = "", weakness2 = "", weakness3 = "", weakness4 = "", weakness5 = "", interesting = "high/med/low", opinions = "", } ; Supplemental Readings @article{littlewood93_validation, author = "Bev Littlewood and Lorenzo Strigini", title = "Validation of ultrahigh dependability for software-based systems", journal = "Communications of the ACM", volume = "36", number = "11", year = "1993", pages = "69--80", url = "http://doi.acm.org/10.1145/163359.163373", studentname = "", summary = "", contribution1 = "", contribution2 = "", contribution3 = "", contribution4 = "", contribution5 = "", weakness1 = "", weakness2 = "", weakness3 = "", weakness4 = "", weakness5 = "", interesting = "high/med/low", opinions = "", } @article{Lala91, author = "Lala, J.H. ; Harper, R.E. ; Alger, L.S.", title = "A design approach for ultrareliable real-time systems", journal = "Computer 24,", year = "1991", pages = "12-22", number = "5", abstract = "A design approach developed over the past few years to formalize redundancy management and validation is described. Redundant elements are partitioned into individual fault-containment regions (FCRs). An FCR is a collection of components that operates correctly regardless of any arbitrary logical or electrical fault outside the region. Conversely, a fault in an FCR cannot cause hardware outside the region to fail. The outputs of all channels are required to agree bit-for-bit under no-fault conditions (exact bitwise consensus). Synchronization, input agreement, and input validity conditions are discussed. The Advanced Information Processing System (AIPS), which is a fault-tolerant distributed architecture based on this approach, is described. A brief overview of recent applications of these systems and current research is presented", url = "http://ieeexplore.ieee.org/iel1/2/2542/00076283.pdf", studentname = "", summary = "", contribution1 = "", contribution2 = "", contribution3 = "", contribution4 = "", contribution5 = "", weakness1 = "", weakness2 = "", weakness3 = "", weakness4 = "", weakness5 = "", interesting = "high/med/low", opinions = "", } @article{Brooks87, author = "Brooks, F.P., Jr.", title = "No silver bullet; essence and accidents of software engineering", journal = "Computer 20,", year = "1987", pages = "10-19", number = "4", abstract = "The author considers the reasons why there is no single development, in either technology or in management technique, that by itself promises even one order-of-magnitude improvement in productivity, reliability, and simplicity of software. He does this by examining both the nature of the software problem and the properties of the solutions posed", url = "http://www.ece.cmu.edu/~ece749/papers/brooks87_no_silver_bullet.pdf", studentname = "", summary = "", contribution1 = "", contribution2 = "", contribution3 = "", contribution4 = "", contribution5 = "", weakness1 = "", weakness2 = "", weakness3 = "", weakness4 = "", weakness5 = "", interesting = "high/med/low", opinions = "", } @inbook{Rushby94, author = "Rushby, J.", title = "Critical system properties: survey and taxonomy", inbook = "Reliability Engineering & System Safety ", year = "1994", pages = "189-219", number = "2", abstract = "Computer systems are increasingly employed in circumstances where their failure (or even their correct operation, if they are built to flawed requirements) can have serious consequences. There is a surprising diversity of opinion concerning the properties that such `critical systems' should possess, and the best methods to develop them. The dependability approach grew out of the tradition of ultra-reliable and fault-tolerant systems, while the safety approach grew out of the tradition of hazard analysis and system safety engineering. Yet another tradition is found in the security community, and there are further specialized approaches in the tradition of real-time systems. The critical properties considered in each approach, and the techniques that have been developed to specify them and to ensure their satisfaction are examined. Since systems are now being constructed that must satisfy several of these critical system properties simultaneously, there is particular interest in the extent to which techniques from one tradition support or conflict with those of another, and in whether certain critical system properties are fundamentally compatible or incompatible with each other. As a step toward improved understanding of these issues, it is suggested that a taxonomy, based on Perrow's analysis (Perrow, C. Normal Accidents: Living with High Risk Technologies. Basic Books, New York, 1984), that considers the complexity of component interactions and tightness", url = "http://citeseer.nj.nec.com/rushby94critical.html", studentname = "", summary = "", contribution1 = "", contribution2 = "", contribution3 = "", contribution4 = "", contribution5 = "", weakness1 = "", weakness2 = "", weakness3 = "", weakness4 = "", weakness5 = "", interesting = "high/med/low", opinions = "", } @article{Saltzer84, author = "Saltzer, J.H. ; Reed, D.P. ; Clark, D.D.", title = "End-to-end arguments in system design", journal = "ACM Transactions on Computer Systems 2,", year = "1984", pages = "277-88", number = "4", abstract = "This paper presents a design principle that helps guide placement of functions among the modules of a distributed computer system. The principle, called the end-to-end argument, suggests that functions placed at low levels of a system may be redundant or of little value when compared with the cost of providing them at that low level. Examples discussed in the paper include bit-error recovery, security using encryption, duplicate message suppression, recovery from system crashes, and delivery acknowledgment. Low-level mechanisms to support these functions are justified only as performance enhancements", url = "http://doi.acm.org/10.1145/357401.357402", studentname = "", summary = "", contribution1 = "", contribution2 = "", contribution3 = "", contribution4 = "", contribution5 = "", weakness1 = "", weakness2 = "", weakness3 = "", weakness4 = "", weakness5 = "", interesting = "high/med/low", opinions = "", }