; Hand this in to: ece849-staff+hw@ece.cmu.edu ; Required Readings @article{lamport78_time_clocks_ordering, author = "Leslie Lamport", title = "Time, Clocks, and the Ordering of Events in a Distributed System", organization = "Massachusetts Computer Associates, Inc", journal = "Communications of the ACM", year = "1978", volume = "12", number = "7", pages = "558--565", url = "http://www.ece.cmu.edu/~ece749/papers/lamport78_time_clocks_ordering.pdf", abstract = "The concept of one event happening before another in a distributed system is examined, and is shown to define a partial ordering of the events. A distributed algorithm is given for synchronizing a system of logical clocks which can be used to tetoally order the events. The use of the total ordering is illustrated with a method for solving synchroization problems. The algorithm is then specialized for synchronizing physical clocks, and a bound is derived on how far out of synchrony the clocks can become.", studentname = "", summary = "", contribution1 ="", contribution2 ="", contribution3 ="", contribution4 ="", contribution5 ="", weakness1 = "", weakness2 = "", weakness3 = "", weakness4 = "", weakness5 = "", interesting = "high/med/low", opinions = "", } @article{kopetz87_clock_sync, author = "Kopetz, H., & Ochsenreiter, W.", title = "Clock synchronization in distributed real time systems", journal = "IEEE Trans. Computers", year = "1987", volume = "36", number = "8", pages = "933--940", abstract = "The generation of a fault-tolerant global timebase with known accuracy of synchronization is one of the important operating system functions in a distributed real-time system. Depending on the types and number of tolerated faults, this paper presents upper bounds on the achievable synchronization accuracy for external and internal synchronization in a distributed real-time system. The concept of continuous versus instantaneous synchronization is introduced in order to generate a uniform common timebase for local, global, and external time measurements. In the last section, the functions of a VLSI clock synchronization unit, which improves the synchronization accuracy and reduces the CPU load, are described. With this unit, the CPU overhead and the network traffic for clock synchronization in state-of-the-art distributed real-time systems can be reduced to less than 1 percent.", studentname = "", summary = "", contribution1 ="", contribution2 ="", contribution3 ="", contribution4 ="", contribution5 ="", weakness1 = "", weakness2 = "", weakness3 = "", weakness4 = "", weakness5 = "", interesting = "high/med/low", opinions = "", } @InProceedings{ mills91_ntp, author = "D. L. Mills", title = "On the chronology and metrology of computer network timescales and their application to the Network Time Protocol", booktitle = "ACM Computer Communications Review", pages = "8--17", year = "1991", volume = "21", number = "5", month = "October", url = "http://www.eecis.udel.edu/~mills/database/papers/time.pdf", abstract = "This paper summarizes issues in computer network timekeeping with respect to the Network Time Protocol, which is used to synchronize time in many of the hosts and gateways of the Internet. It describes the methods used to coordinate and disseminate international time services and how they are incorporated into NTP time servers. It discusses the hazards on reckoning NTP dates with the conventional civil calendar and a standard method for numbering the days. Finally, the paper describes the NTP timescale, its mapping to conventional civil time and date and its interpretation of leap seconds.", studentname = "", summary = "", contribution1 ="", contribution2 ="", contribution3 ="", contribution4 ="", contribution5 ="", weakness1 = "", weakness2 = "", weakness3 = "", weakness4 = "", weakness5 = "", interesting = "high/med/low", opinions = "", } ; Pick one of the following @article{kopetz02_temporal_composeability, author = "Kopetz, H.; Obermaisser, R.", title = "Temporal composability", journal = "Computing & Control Engineering Journal ", year = "2002", volume = "13", number = "4", pages = "156--162", url = "http://ieeexplore.ieee.org/iel5/2218/22129/01029796.pdf", abstract = "The constructive design of dependable distributed real-time systems out of prevalidated components requires precise interface specifications of the components in the temporal domain and in the value domain. This paper focuses on the temporal specifiacation of interfaces in composable distributed real-time systems and establishes four principles of composability. It presents the temporal firewall interface that forms a fully specified operational interface of a componenet. The paper explains how the temporal firewall interface supports the four principles of composability. It then classifies interfaces from the point of view of composability and shows how these interfaces correspond to the time-triggered and event-triggered communications paradigm.", studentname = "", summary = "", contribution1 ="", contribution2 ="", contribution3 ="", contribution4 ="", contribution5 ="", weakness1 = "", weakness2 = "", weakness3 = "", weakness4 = "", weakness5 = "", interesting = "high/med/low", opinions = "", } @article{raynal96_capturing_causality, author = "Raynal, M., Singhal, M.", title = "Logical time: capturing causality in distributed systems", journal = "Computer", year = "1996", volume = "29", number = "2", pages = "49--56", url = "http://ieeexplore.ieee.org/iel1/2/10411/00485846.pdf", abstract = "Causality is vital in distributed computations. Distributed systems can determine causality using logical clocks. Human beings use the concept of causality to plan, schedule, and execute an enterprise, or to determine a plan's feasibility. In daily life, we use global time to deduce causality from loosely synchronized clocks such as wrist watches and wall clocks. But in distributed computing systems, the rate of event occurrence is several magnitudes higher, and the event-execution time several magnitudes smaller. If the physical clocks in these systems are not synchronized precisely the causality relation between events cannot be captured accurately. However, distributed systems have no built-in physical time and can only approximate it. This article presents a general framework of a system of logical clocks in distributed systems and discusses three methods: scalar, vector and matrix, for implementing logical time in these systems", studentname = "", summary = "", contribution1 ="", contribution2 ="", contribution3 ="", contribution4 ="", contribution5 ="", weakness1 = "", weakness2 = "", weakness3 = "", weakness4 = "", weakness5 = "", interesting = "high/med/low", opinions = "", } ; Supplemental Reading @Conference{Cheriton, author = "Cheriton, D.R. ; Skeen, D.", title = "Understanding the limitations of causally and totally ordered communication", inbook = "Operating Systems Review 27, no. 5, (Dec. 1993) : 44-57 Journal Code: Oper. Syst. Rev. (USA)", abstract = "Causally and totally ordered communication support (CATOCS) has been proposed as important to provide as part of the basic building blocks for constructing reliable distributed systems. In this paper, we identify four major limitations to CATOCS, investigate the applicability of CATOCS to several classes of distributed applications in light of these limitations, and the potential impact of these facilities on communication scalability and robustness. From this investigation, we find limited merit and several potential problems in using CATOCS. The fundamental difficulty with the CATOCS is that it attempts to solve state problems at the communication level in violation of the well-known end-to-end argument", url = "http://doi.acm.org/10.1145/168619.168623", studentname = "", summary = "", contribution1 ="", contribution2 ="", contribution3 ="", contribution4 ="", contribution5 ="", weakness1 = "", weakness2 = "", weakness3 = "", weakness4 = "", weakness5 = "", interesting = "high/med/low", opinions = "", } @article{Ramanathan90, author = "Ramanathan, P. ; Shin, K.G. ; Butler, R.W.", title = "Fault-tolerant clock synchronization in distributed systems", journal = "Computer 23,", year = "1990", pages = "33-42", number = "10", abstract = "Existing fault-tolerant clock synchronization algorithms are compared and contrasted. These include the following: software synchronization algorithms, such as convergence-averaging, convergence-nonaveraging, and consistency algorithms, as well as probabilistic synchronization; hardware synchronization algorithms; and hybrid synchronization. The worst-case clock skews guaranteed by representative algorithms are compared, along with other important aspects such as time, message, and cost overhead imposed by the algorithms. More recent developments such as hardware-assisted software synchronization and algorithms for synchronizing large, partially connected distributed systems are especially emphasized", url = "http://ieeexplore.ieee.org/iel1/2/2111/00058235.pdf", studentname = "", summary = "", contribution1 ="", contribution2 ="", contribution3 ="", contribution4 ="", contribution5 ="", weakness1 = "", weakness2 = "", weakness3 = "", weakness4 = "", weakness5 = "", interesting = "high/med/low", opinions = "", } @article{Netzer95, author = "Netzer, R.H.B. ; Jian Xu", title = "Necessary and sufficient conditions for consistent global snapshots", journal = "IEEE Transactions on Parallel and Distributed Systems 6,", year = "1995", pages = "165-9", number = "2", abstract = "Consistent global snapshots are important in many distributed applications. We prove the exact conditions for an arbitrary checkpoint, or a set of checkpoints, to belong to a consistent global snapshot, a previously open problem. To describe the conditions, we introduce a generalization of Lamport's (1978) happened-before relation called a zigzag path", url = "http://ieeexplore.ieee.org/iel4/71/8001/00342127.pdf", studentname = "", summary = "", contribution1 ="", contribution2 ="", contribution3 ="", contribution4 ="", contribution5 ="", weakness1 = "", weakness2 = "", weakness3 = "", weakness4 = "", weakness5 = "", interesting = "high/med/low", opinions = "", } @article{Palumbo94, author = "Palumbo, D.L.", title = "The derivation and experimental verification of clock synchronization theory", journal = "IEEE Transactions on Computers 43,", year = "1994", pages = "676-86", number = "6", abstract = "The objective of this work is to validate mathematically derived clock synchronization theories and their associated algorithms through experiment. Two theories are considered, the Interactive Convergence Clock Synchronization Algorithm and the Mid-Point Algorithm. Special clock circuitry was designed and built so that several operating conditions and failure modes (including malicious failures) could be tested. Both theories are shown to predict conservative upper bounds (i.e., measured values of clock skew were always less than the theory prediction). Insight gained during experimentation led to alternative derivations of the theories. These new theories accurately predict the clock system's behavior. It is found that a 100\% penalty is paid to tolerate worst case failures. It is also shown that under optimal conditions (with minimum error and no failures) the clock skew can be as much as 3 clock ticks. Clock skew grows to 6 clock ticks when failures are present. Finally, it is concluded that one cannot rely solely on test procedures or theoretical analysis to predict worst case conditions", url = "http://ieeexplore.ieee.org/iel1/12/7122/00286301.pdf", studentname = "", summary = "", contribution1 ="", contribution2 ="", contribution3 ="", contribution4 ="", contribution5 ="", weakness1 = "", weakness2 = "", weakness3 = "", weakness4 = "", weakness5 = "", interesting = "high/med/low", opinions = "", } @article{Vasanthavada88, author = "Vasanthavada, N. ; Marinos, P.N.", title = "Synchronization of fault-tolerant clocks in the presence of malicious failures", journal = "IEEE Transactions on Computers 37,", year = "1988", pages = "440-8", number = "4", abstract = "The problem of achieving global clock synchronization in fault-tolerant clocks by preventing so-called multiple cliques in the presence of malicious clock failures (i.e. clock failures that are perceived differently by different nonfaulty clocks) is addressed. A solution to the problem, referred to as the averaging rule, is developed, and its use is analytically justified using the notions of clock partitions and generalized clock partitions. Experimental characterization of the multiple cliques problem has been undertaken, and certain conditions that induce their occurrence in practical hardware implementation are identified. The effects of clock-receiver triggering variations and phase-detector operating range on the instantaneous frequencies of the clock modules are investigated. The efficacy of the averaging rule is established not only by analysis but also by means of simulations and experimentation with hardware clock implementations", url = "http://ieeexplore.ieee.org/iel1/12/134/00002188.pdf", studentname = "", summary = "", contribution1 ="", contribution2 ="", contribution3 ="", contribution4 ="", contribution5 ="", weakness1 = "", weakness2 = "", weakness3 = "", weakness4 = "", weakness5 = "", interesting = "high/med/low", opinions = "", } @article{Shin87, author = "Shin, K.G. ; Ramanathan, P.", title = "Clock synchronization of a large multiprocessor system in the presence of malicious faults", journal = "IEEE Transactions on Computers C-36,", year = "1987", pages = "2-12", number = "1", abstract = "A hardware synchronization algorithm is proposed that can be used to synchronize all the clocks in a multiprocessor system of any arbitrary size (small or large). It provides an optimum tradeoff between the total number of interconnections and the fault tolerance of the system, while maintaining the symmetry of the network. Given any desired fault tolerance specification, this algorithm can be used to determine the network architecture that uses a near-minimal number of interconnections as well as a clock network very similar to the processor network. The use of this algorithm could reduce the total number of interconnections by 80\%, while causing little or no difference in the synchronizing capabilities. Consequently, it has a high potential for synchronizing large multiprocessor systems", url = "http://www.ece.cmu.edu/~ece749/papers/shin87_clock_sync_malicious_faults.pdf", studentname = "", summary = "", contribution1 ="", contribution2 ="", contribution3 ="", contribution4 ="", contribution5 ="", weakness1 = "", weakness2 = "", weakness3 = "", weakness4 = "", weakness5 = "", interesting = "high/med/low", opinions = "", }