%%% Please DO NOT EDIT this file - it was automatically generated. %%% Technical reports from the Department of Information Technology, %%% Uppsala University, Sweden. %%% Series ISSN 1404-3203 (1404-0603 up to and including 1999-006). %%% %%% ==================================================================== %%% BibTeX-file{ %%% author = "Bj{\"o}rn Victor", %%% date = "20 Mar 2024", %%% filename = "itreports.bib", %%% url = "http://www.it.uu.se/research/publications/reports/itreports.bib", %%% www-home = "http://www.it.uu.se/research/publications/reports/", %%% address = "Department of Information Technology, %%% Uppsala University, %%% Box 337, %%% SE-751 05 Uppsala, SWEDEN", %%% telephone = "+46 18 471 0000", %%% FAX = "+46 18 511925", %%% email = "Bjorn.Victor at it.uu.se", %%% dates = {1999--}, %%% keywords = "", %%% supported = "yes", %%% supported-by = "Bj{\"o}rn Victor", %%% abstract = "Technical reports from the Department of %%% Information Technology at Uppsala University" %%% } %%% ==================================================================== @STRING{cb = "Centre for Image Analysis" } @STRING{csd = "Computing Science Division" } @STRING{docs = "Division of Computer Systems" } @STRING{hci = "Division of Human-Computer Interaction" } @STRING{it = "Department of Information Technology, Uppsala University" } @STRING{mdi = "Division of Human-Computer Interaction" } @STRING{syscon = "Division of Systems and Control" } @STRING{tdb = "Division of Scientific Computing" } @STRING{vi2 = "Division of Visual Information and Interaction" } @STRING{vi3 = "Division Vi3" } @TechReport{ it:2024-001, author = {Thom Kunkeler and Aletta Nyl{\'e}n}, title = {Supplement - Capital in Computing Education: Investigating Factors Underlying Participation}, institution = it, department = vi3, year = 2024, number = {2024-001}, month = mar, abstract = {This document provides the supplementing material for the following publica- tion: Thom Kunkeler and Aletta Nyl{\'e}n. Capital in Computing Education: Investigating Factors Underlying Participation. 2024. In Proceedings of the 2024 Conference on Innovation and Technology in Computer Science Education (Milan, Italy, 2024-07-08) (ITiCSE'24). In this publication, we developed a validated survey instrument to measure capital in computing education. Capital refers to the legitimate, valuable and exchangeable resources that individuals use to generate social advantage within specific fields. In computing education, a theoretical model has been developed highlighting the forms of capital which influence participation and success in the field. This study assessed the theoretical model through careful survey design and Confirmatory Factor Analaysis (CFA). The hypothesised survey structure was assessed in terms of model fit to the observed data, and adjusted to achieve a survey with high internal consistency among the items and factors (robust: $X^2p$ = 0.119; CFI/TLI = 0.97/0.95; RMSEA = 0.06, SRMR = 0.041). This document contains a detailed presentation of the pre- and post-validated survey instrument, in addition to the factor analysis diagram.} } @TechReport{ it:2023-003, author = {Torsten S{\"o}derstr{\"o}m}, title = {Relations Between Prediction Error and Maximum Likelihood Methods in an Error-in-Variables Setting. Extended version with full proofs}, institution = it, year = 2023, department = syscon, number = {2023-003}, month = oct, abstract = {Prediction error (PE) and maximum likelihood (ML) methods are often treated as synonyms when identifying linear dynamic systems from Gaussian data. It is shown how these methods differ when specifically dealing with errors-in-variables problems. These problems can modeled using multivariable times series with a specific internal structure. In such situations the ML estimates have lower variances than the PE estimates. Explicit expressions for the covariance matrices of the estimates are given and analyzed. For the special case when the unperturbed input is white noise it is shown that the PE estimate is not identifiable, while the ML estimates still have quite small variances. Another special case concerns non-Gaussian data. In that case a pseudo-ML estimate (using the ML criterion as if the data were Gaussian) will no longer be superior to the PE estimate in terms of error variances.} } @TechReport{ it:2023-002, author = {Ginevra Castellano and Gunilla Kreiss and Robin Strand and von Sydow, Lina}, title = {Using gender equality indicators to support gender mainstreaming work at the {D}epartment of {I}nformation {T}echnology}, institution = it, year = 2023, number = {2023-002}, month = apr, abstract = {Previous research has shown that gender statistics can be a powerful tool to raise organizational awareness of gender issues. This report presents the results of a project that investigated how Uppsala University’s gender equality indicators can be used to monitor the gender distribution of research resources and funding at the Department of Information Technology and how they can be used in a long-term perspective to improve gender mainstreaming work at the department. Results show that gender differences exist and they are sometimes in favour of females and sometimes in favour of males. This analysis raises several questions of relevance to future gender mainstreaming work at the department.} } @TechReport{ it:2023-001, author = {Ivo Dravins and Maya Neytcheva}, title = {Preconditioning of Discrete State- and Control-Constrained Optimal Control Convection-Diffusion Problems}, institution = it, department = tdb, year = 2023, number = {2023-001}, month = feb, note = {This is an extensively updated version of Technical Report nr 2021-003.}, abstract = {We consider the iterative solution of algebraic systems, arising in optimal control problems, constrained by a partial differential equation, with additional box constraints on the state and the control variables, and sparsity imposed on the control. A nonsymmetric two-by-two block preconditioner is analysed and tested for a wide range of problem, regularization and discretization parameters. The constraint equation characterizes convection-diffusion processes.} } @TechReport{ it:2022-009, author = {Torsten S{\"o}derstr{\"o}m and Umberto Soverini}, title = {Analyzing the Parameter Bias when an {ARMAX} Model is Fitted to Noise-Corrupted Data}, institution = it, department = syscon, year = 2022, number = {2022-009}, month = oct, abstract = {When an ARMAX model is fitted to noise-corrupted data using the prediction error method, biased estimates are obtained. The bias is examined, with emphasis on the situation when the system is almost non-identifiable. In contrast to the case of using an output error model, no general results on the size of the bias seem to apply.} } @TechReport{ it:2022-008, author = {Torsten S{\"o}derstr{\"o}m and Umberto Soverini}, title = {Analyzing the Parameter Bias when an Instrumental Variable Method is Used with Noise-Corrupted Data}, institution = it, department = syscon, year = 2022, number = {2022-008}, month = oct, abstract = {When an output error model is fitted to data with noise-corrupted inputs using a prediction error method, a bias occurs. It was previously shown that the bias is of order $O(1/\delta)$ for a small pole-zero separation $\delta$. These notes examine the same problem when an instrumental variable model is fitted. A similar result is shown to hold for the instrumental variable case.} } @TechReport{ it:2022-007, author = {Gustaf Borgstr{\"o}m and Christian Rohner and David Black-Schaffer}, title = {Faster Functional Warming with Cache Merging}, institution = it, department = docs, year = 2022, number = {2022-007}, note = {Updated 2022-09-07, correcting spelling errors.}, month = aug, abstract = {SMARTS-like sampled hardware simulation techniques achieve good accuracy by simulating many small portions of an application in detail. However, while this reduces the detailed simulation time, it results in extensive cache warming times, as each of the many simulation points requires warming the whole memory hierarchy. Adaptive Cache Warming reduces this time by iteratively increasing warming until achieving sufficient accuracy. Unfortunately, each time the warming increases, the previous warming must be redone, nearly doubling the required warming. We address re-warming by developing a technique to merge the cache states from the previous and additional warming iterations. We address re-warming by developing a technique to merge the cache states from the previous and additional warming iterations. We demonstrate our merging approach on multi-level LRU cache hierarchy and evaluate and address the introduced errors. By removing warming redundancy, we expect an ideal 2x warming speedup when using our Cache Merging solution together with Adaptive Cache Warming. Experiments show that Cache Merging delivers an average speedup of 1.44x, 1.84x, and 1.87x for 128kB, 2MB, and 8MB L2 caches, respectively, with 95-percentile absolute IPC errors of only 0.029, 0.015, and 0.006, respectively. These results demonstrate that Cache Merging yields significantly higher simulation speed with minimal losses.} } @TechReport{ it:2022-006, author = {Ruoqi Zhang and Per Mattsson and Torbj{\"o}rn Wigren}, title = {A Robust Multi-Goal Exploration Aided Tracking Policy}, institution = it, department = tdb, year = 2022, number = {2022-006}, month = jun, abstract = {Set-point control aims at finding a policy that can track a set point that varies over time. Such control objectives are central in industry, yet multi-goal Reinforcement Learning methods are typically evaluated on other environments. The paper therefore proposes the use of a combination of feedback based amplitude aided exploration, simulated ensemble model training, together with policy optimization also over integrated errors, to arrive at a trained multi-goal policy that can be directly deployed to real-world nonlinear set-point control systems. The claim is supported by experiments with a real-world nonlinear cascaded tank process and a simulated strongly non-linear pH-control system.} } @TechReport{ it:2022-005, author = {Camille Clouard and Carl Nettelblad}, title = {Consistency Study of a Reconstructed Genotype Probability Distribution via Clustered Bootstrapping in {NORB} Pooling Blocks}, institution = it, department = tdb, year = 2022, number = {2022-005}, month = jun, abstract = {For applications with biallelic genetic markers, group testing techniques, synonymous to pooling techniques, are usually applied for decreasing the cost of large-scale testing as e.g. when detecting carriers of rare genetic variants. In some configurations, the results of the grouped tests cannot be decoded and the pooled items are missing. Inference of these missing items can be performed with specific statistical methods that are for example related to the Expectation-Maximization algorithm. Pooling has also been applied for determining the genotype of markers in large populations. The particularity of full genotype data for diploid organisms in the context of group testing are the ternary outcomes (two homozygous genotypes and one heterozygous), as well as the distribution of these three outcomes in a population, which is often ruled by the Hardy-Weinberg Equilibrium and depends on the allele frequency in such situation. When using a nonoverlapping repeated block pooling design, the missing items are only observed in particular arrangements. Overall, a data set of pooled genotypes can be described as an inference problem in Missing Not At Random data with nonmonotone missingness patterns. This study presents a preliminary investigation of the consistency of various iterative methods estimating the most likely genotype probabilities of the missing items in pooled data. We use the Kullback-Leibler divergence and the L2 distance between the genotype distribution computed from our estimates and a simulated empirical distribution as a measure of the distributional consistency. } } @TechReport{ it:2022-004, author = {Owe Axelsson and Ivo Dravins and Maya Neytcheva}, title = {Stage-Parallel Preconditioners for Implicit {R}unge-{K}utta Methods of Arbitrary High Order. Linear problems}, institution = it, department = tdb, year = 2022, number = {2022-004}, month = apr, abstract = {Fully implicit Runge-Kutta methods offer the possibility to use high order accurate time discretization to match space discretization accuracy, an issue of significant importance for many large scale problems of current interest, where we may have fine space resolution with many millions of spatial degrees of freedom and long time intervals. In this work we consider strongly A-stable implicit Runge-Kutta methods of arbitrary order of accuracy, based on Radau quadratures. For the arising large algebraic systems we introduce an efficient preconditioner, that allows for fully stage-parallel solution. We analyse the spectrum of the corresponding preconditioned system and illustrate the performance of the solution method with numerical experiments using MPI. In this work we consider only linear problems. } } @TechReport{ it:2022-003, author = {Ken Mattsson and Ylva Ljungberg Rydin}, title = {Implicit Summation by Parts Operators for Finite Difference Approximations of First and Second Derivatives}, institution = it, department = tdb, year = 2022, number = {2022-003}, month = jan, abstract = {Implicit finite difference approximations are derived for both the first and second derivates. The boundary closures are based on the banded-norm summation-by-parts framework and the boundary conditions are imposed using a weak (penalty) enforcement. Up to 8th order global convergence is achieved. The finite difference approximations lead to implicit ODE systems. Spectral resolution characteristics are achieved by proper tuning of the internal difference stencils. The accuracy and stability properties are demonstrated for linear hyperbolic problems in 1D and the 2D compressible Euler equations.} } @TechReport{ it:2022-002, author = {Torbj{\"o}rn Wigren}, title = {{MATLAB} Software for Nonlinear and Delayed Recursive Identification - Revision 2}, institution = it, department = tdb, year = 2022, number = {2022-002}, month = jan, note = {Revised version of nr 2017-007. The software package (last updated 2022-09-12) can be downloaded from \url{https://www.it.uu.se/research/publications/reports/2022-002/RecursiveNonlinearNetworkedIdentificationSW-r3.zip}.} , abstract = {This report is the user's manual for a package of MATLAB scripts and functions, developed for recursive prediction error identification of nonlinear state space systems. The identified state space model incorporates delay, which allows a treatment of general nonlinear networked identification, as well as of general nonlinear systems with delay. The core of the package is an implementation of two output error identification algorithms. The algorithms are based on a continuous time, structured black box state space model of a nonlinear system. The present revision adds a new algorithm, where also the output is determined via a parameterized measurement equation in the states and inputs. The software can only be run off-line, i.e. no true real time operation is possible. The algorithms are however implemented so that true on-line operation can be obtained by extraction of the main algorithmic loop. The user must then provide the real time environment. The software package contains scripts and functions that allow the user to either input live measurements or to generate test data by simulation. The scripts and functions for the setup and execution of the identification algorithms are somewhat more general than what is described in the references. The functionality for display of results include scripts for plotting of e.g. data, parameters, prediction errors, eigenvalues and the condition number of the Hessian. The estimated model obtained at the end of a run can be simulated and the model output plotted, alone or together with the data used for identification. Model validation is supported by two methods apart from the display functionality. First, a calculation of the RPEM loss function can be performed, using parameters obtained at the end of an identification run. Secondly, the accuracy as a function of the output signal amplitude can be assessed.} } @TechReport{ it:2022-001, author = {Diane Golay and {\AA}sa Cajander}, title = {Sjuksk{\"o}terskors upplevelse av att jobba med IT-system: sammanfattning}, institution = it, department = vi2, year = 2022, number = {2022-001}, month = jan, note = {In Swedish.}, abstract = {Denna rapport sammanfattar resultaten fr{\aa}n en kvalitativ studie om sjuksk{\"o}terskors upplevelse av IT-system p{\aa} jobbet. De k{\"a}nslor och uppfattningar som sjuksk{\"o}terskor upplevde i samband med IT-anv{\"a}ndning p{\aa} jobbet presenteras och implikationerna f{\"o}r design och implementering av IT-system och IT-st{\"o}dda processer i sjukhusmilj{\"o} diskuteras.} } @TechReport{ it:2021-008, author = {Torbj{\"o}rn Wigren}, title = {{MATLAB} Software for Recursive Identification and Scaling Using a Structured Nonlinear Black-box Model - Revision 7}, institution = it, department = tdb, year = 2021, number = {2021-008}, month = dec, note = {Revised version of nr 2010-022. The software package can be downloaded from \url{http://www.it.uu.se/research/publications/reports/2021-008/NRISSoftwareRev7.zip}.} , abstract = {This reports is intended as a users manual for a package of MATLAB scripts and functions, developed for recursive prediction error identification of nonlinear state space systems and nonlinear static systems. The core of the package is the implementation of three output error identification and scaling algorithms. The first algorithm is based on a continuous time, structured black box state space model of a nonlinear system. An RPEM algorithm for recursive identification of nonlinear static systems, that re-uses the parameterization of the nonlinear ODE model, is also included in the software package. The present revision adds a third algorithm, where also the output is determined via a parameterized measurement equation in the states and inputs. The software can only be run off-line, i.e. no true real time operation is possible. The algorithm is however implemented so that true on-line operation can be obtained by extraction of the main algorithmic loop. The user must then provide the real time environment. The software package contains scripts and functions that allow the user to either input live measurements or to generate test data by simulation. The scripts and functions for the setup and execution of the identification algorithms are somewhat more general than what is described in the references. There is e.g. support for automatic re-initiation of the algorithms using the parameters obtained at the end of a previous identification run. This allows for multiple runs through a set of data, something that is useful for data sets that are too short to allow complete convergence. The re-initiation step also allows the user to modify the degrees of the polynomial model structure and to specify terms that are to be excluded from the model. This makes it possible to iteratively refine the estimated model using multiple runs. The functionality for display of results include scripts for plotting of data, parameters, prediction errors, eigenvalues and the condition number of the Hessian. The estimated model obtained at the end of a run can be simulated and the model output plotted, alone or together with the data used for identification. Model validation is supported by two methods apart from the display functionality. First, calculation of the RPEM loss function can be performed, using parameters obtained at the end of an identification run. Secondly, the accuracy as a function of the output signal amplitude can be assessed.} } @TechReport{ it:2021-007, author = {Giovanni Barbarino and Melker Claesson and Sven-Erik Ekstr{\"o}m and Carlo Garoni and David Meadon and Hendrik Speleers}, title = {Matrix-Less Eigensolver for Large Structured Matrices}, institution = it, department = tdb, year = 2021, number = {2021-007}, month = nov, abstract = {Sequences of structured matrices of increasing size arise in many scientific applications and especially in the numerical discretization of linear differential problems. We assume as a working hypothesis that the eigenvalues of a matrix $X_n$ belonging to a sequence of this kind are given by a regular expansion. Based on this working hypothesis, which is illustrated to be plausible through numerical experiments, we propose an eigensolver for the computation of the eigenvalues of $X_n$ for large $n$ and we provide a theoretical analysis of its convergence. The eigensolver is called matrix-less because it does not operate on the matrix $X_n$ but on a few similar matrices of smaller size combined with an interpolation-extrapolation strategy. Its performance is benchmarked on several numerical examples, with a special focus on matrices arising from the discretization of differential problems.}, note = {Updated version of nr 2021-005.} } @TechReport{ it:2021-006, author = {Torsten S{\"o}derstr{\"o}m and Umberto Soverini}, title = {When are Errors-in-Variables Aspects Particularly Important to Consider in System Identification?}, institution = it, department = syscon, year = 2021, number = {2021-006}, month = sep, abstract = {When recorded signals are corrupted by noise on both input and output sides, all standard identification methods give biased parameter estimates, due to the presence of input noise. This report discusses in what situations such a bias is large and, consequently, when the errors-in-variables identification methods are to be preferred. } } @TechReport{ it:2021-005, author = {Giovanni Barbarino and Melker Claesson and Sven-Erik Ekstr{\"o}m and Carlo Garoni and David Meadon}, title = {Matrix-Less Eigensolver for Large Structured Matrices}, institution = it, department = tdb, year = 2021, number = {2021-005}, month = aug, abstract = {Sequences of structured matrices of increasing size arise in many scientific applications and especially in the numerical discretization of linear differential problems. We assume as a working hypothesis that the eigenvalues of a matrix $X_n$ belonging to a sequence of this kind are given by a regular expansion. Based on the working hypothesis, which is proved to be plausible through numerical experiments, we propose an eigensolver for the computation of the eigenvalues of $X_n$ for large~$n$. The performance of the eigensolver---which is called matrix-less because it does not operate on the matrix $X_n$---is illustrated on several numerical examples, with a special focus on matrices arising from the discretization of differential problems, and turns out to be quite satisfactory in all cases. In a sense, this is an a posteriori proof of the reasonableness of the working hypothesis as well as a testimony of the fact that the spectra of large structured matrices are much more ``regular'' than one might expect.}, note = {Updated 2021-09-03 and 2021-09-08.} } @TechReport{ it:2021-004, author = {Minna Salminen Karlsson}, title = {IT-system i sjuksk{\"o}terskors arbetsmilj{\"o}. Att t{\"a}nka p{\aa} vid upphandling och inf{\"o}rande}, institution = it, year = 2021, number = {2021-004}, month = apr, note = {In Swedish.}, abstract = {Rapporten visar, utifr{\aa}n en beskrivning av sjuksk{\"o}terskors arbete, vilka aspekter beh{\"o}ver tas h{\"a}nsyn till vid fortsatt digitalisering av slutenv{\aa}rd. Sjuksk{\"o}terskornas arbetsmilj{\"o} st{\aa}r i fokus. Syftet {\"a}r att {\"o}ka f{\"o}rst{\aa}elsen f{\"o}r s{\aa}dana s{\"a}rskilda aspekter i sjuksk{\"o}terskors arbete som har h{\"o}g relevans f{\"o}r IT-inf{\"o}rande. Utifr{\aa}n denna f{\"o}rst{\aa}else pekar rapporten p{\aa} s{\aa}dant som b{\"o}r skrivas in i kravspecifikationer och {\aa}tg{\"a}rdas vid inf{\"o}rande, f{\"o}r att IT-systemen i verkligheten skulle {\"o}ka v{\aa}rdens effektivitet, kvalitet och patients{\"a}kerhet, utan att f{\"o}rs{\"a}mra sjuksk{\"o}terskors fysiska och psykiska arbetsmilj{\"o}. Rapporten b{\"o}rjar med en generell beskrivning av arbetsmilj{\"o} f{\"o}r att fokusera p{\aa} sjuksk{\"o}terskors arbetsmilj{\"o} och ytterligare p{\aa} sjuksk{\"o}terskors anv{\"a}ndning av olika slags information. Grundl{\"a}ggande krav f{\"o}r v{\"a}lfungerade system r{\"a}knas upp, liksom vanliga problem i de nuvarande systemen. Rapporten pekar p{\aa} vissa arbetsuppgifter som sv{\aa}rligen kan digitaliseras. Kostnader som ett inf{\"o}rande f{\"o}r med sig och som ofta {\"a}r dolda ber{\"o}rs ocks{\aa}.} } @TechReport{ it:2021-003, author = {Ivo Dravins and Maya Neytcheva}, title = {On the Numerical Solution of State- and Control-Constrained Optimal Control Problems}, institution = it, department = tdb, year = 2021, number = {2021-003}, month = apr, abstract = {We consider the iterative solution of algebraic systems, arising in optimal control problems, constrained by a partial differential equation, with additional box constraints on the state and the control variables, and sparsity imposed on the control. A nonsymmetric two-by-two block preconditioner is analysed and tested for a wide range of problem, regularization and discretization parameters. The constraint equation characterizes convection-diffusion processes.} } @TechReport{ it:2021-002, author = {Rafael Diaz Fuentes and Mariarosa Mazza and Stefano Serra-Capizzano}, title = {A {$\omega$}-Circulant Regularization for Linear Systems Arising in Interpolation with Subdivision Schemes}, institution = it, department = tdb, year = 2021, number = {2021-002}, month = mar, abstract = {In the curve interpolation with primal and dual form of stationary subdivision schemes, the computation of the relevant parameters amounts in solving special banded circulant linear systems, whose coefficients are related to quantities arising from the used stationary subdivision schemes. In some important cases it happens that the associated generating function, which is a special Laurent polynomial called symbol, has zeros on the unit complex circle of the form exp$(2\pi \i j/n)$, where $n$ is the size of the matrix, $\i^2=-1$, and $j$ is a non-negative integer bounded by $n-1$. When this situation occurs the discrete problem is ill-posed simply because the circulant coefficient matrix is singular and the problem has no solution (or infinitely many). Standard and nonstandard regularization techniques such as least square solutions or Tikhonov regularization have been tried, but the quality of the solution is not good enough. In this work we propose a structure preserving regularization in which the circulant matrix is replaced by the $\omega$-circulant counterpart, with $\omega$ being a complex parameter. A careful choice of $\omega$ close to $1$ (recall that the set of $1$-circulants coincides with standard circulant matrices) allows to solve satisfactorily the problem of the ill-posedness, even if the quality of the reconstruction is reasonable only in a restricted number of cases. Numerical experiments and further algorithmic proposals are presented and critically discussed. } } @TechReport{ it:2021-001, author = {Arve Gengelbach and Johannes {\AA}man Pohjola}, title = {Towards Correctly Checking for Cycles in Overloaded Definitions}, institution = it, department = csd, year = 2021, number = {2021-001}, month = mar, abstract = {Safe extension of a logic with definitional axioms need acyclic definitions, because cycles in definitions possibly entail contradiction. In this report we give a mechanised exact characterisation of acyclic overloading definitions. Our results support the soundness argument of the cyclicity checker of the Isabelle/HOL theorem prover, and serve as the theoretical foundation for future verification of such a cyclicity checker.} } @TechReport{ it:2020-004, author = {Malin K{\"a}ll{\'e}n and Sverker Holmgren and Ebba Thora Hvannberg}, title = {Impact of Code Refactoring using Object-Oriented Methodology on a Scientific Computing Application}, institution = it, department = tdb, year = 2020, number = {2020-004}, month = oct, abstract = {The effect of refactoring on the quality of software has been extensively evaluated in scientific studies. We see a need to also consider its effect on performance. To this end, we have refactored the central parts of a code base developed in academia for a class of computationally demanding scientific computing problems. We made design choices based on the SOLID principles and we used object-oriented techniques in the implementation. We discuss the effect on maintainability qualitatively and also analyze it quantitatively. Not surprisingly, we find that maintainability increased as an effect of the refactoring. We also find that dynamic binding in the most frequently executed parts of the code makes the execution times increase significantly. By exploiting static polymorphism, we reduce the increase in execution times, and in some cases even get better performance than for the original code. We argue that the code version implementing static polymorphism is less maintainable than the one using dynamic polymorphism, although both versions are considerably more maintainable than the original code. Accordingly, we conclude that static polymorphism could be used to increase maintainability for performance critical code bases. Last, we argue that static polymorphism could even improve performance in some procedural code bases.} } @TechReport{ it:2020-003, author = {Sven-Erik Ekstr{\"o}m and Carlo Garoni and Adam Jozefiak and Jesse Perla}, title = {Eigenvalues and Eigenvectors of Tau Matrices with Applications to {M}arkov Processes and Economics}, institution = it, department = tdb, year = 2020, number = {2020-003}, month = aug, abstract = {In the context of matrix displacement decomposition, Bozzo and Di Fiore introduced the so-called $\tau_{\epsilon,\phi}$ algebra, a generalization of the more known $\tau$ algebra originally proposed by Bini and Capovani. We study the properties of eigenvalues and eigenvectors of the generator $T_{n,\epsilon,\phi}$ of the $\tau_{\epsilon,\phi}$ algebra. In particular, we derive the asymptotics for the outliers of $T_{n,\epsilon,\phi}$ and the associated eigenvectors; we obtain equations for the eigenvalues of $T_{n,\epsilon,\phi}$, which provide also the eigenvectors of $T_{n,\epsilon,\phi}$; and we compute the full eigendecomposition of $T_{n,\epsilon,\phi}$ in the specific case $\epsilon\phi=1$. We also present applications of our results in the context of queuing models, random walks, and diffusion processes, with a special attention to their implications in the study of wealth/income inequality and portfolio dynamics.} } @TechReport{ it:2020-002, author = {Owe Axelsson and Michal B{\'e}res and Radim Blaheta}, title = {A Boundary Optimal Control Identification Problem}, institution = it, department = tdb, year = 2020, number = {2020-002}, month = may, abstract = {Optimal control methods are applied in various problems and can be efficient also for solving inverse problems, such as parameter identification and boundary control which arise in many important applications. For boundary optimal control methods one can identify conditions on an inaccessible part of the boundary by letting them play the role of a control variable function and by overimposing boundary conditions at another part of the boundary of the given domain. The paper shows a mathematical formulation of the problem, the arising (regularized) Karush-Kuhn-Tucker (KKT) system and introduces a preconditioner for this system. The spectral analysis of the preconditioner and numerical tests with preconditioning are included.} } @TechReport{ it:2020-001, author = {{\AA}sa Cajander and Marta Larusdottir and Gustaf Hedstr{\"o}m}, title = {Digital arbetsmilj{\"o} och projektet N{\"a}ra v{\aa}rd online}, institution = it, year = 2020, number = {2020-001}, month = mar, abstract = {Denna rapport inneh{\aa}ller en kartl{\"a}ggning av 1177-personalens digitala arbetsmilj{\"o} vid arbete med den chattfunktion till patienter som anv{\"a}ndes under ett pilotprojekt h{\"o}sten 2019. Rapporten {\"a}r skriven med utg{\aa}ngspunkt fr{\aa}n att l{\"a}saren har inblick i personalen p{\aa} 1177s arbete, och har kunskap kring de IT-system som anv{\"a}nds. Kartl{\"a}ggningen genomf{\"o}rdes som ett samarbete mellan forskningsprojektet Systemutvecklingsmetoder f{\"o}r digital arbetsmilj{\"o} som leds av Uppsala Universitet och Region Uppsalas projekt N{\"a}ra V{\aa}rd Online under h{\"o}sten och vintern 2019/2020. Kontextuella intervjuer genomf{\"o}rdes p{\aa} plats under cirka fem timmar. Dessutom genomf{\"o}rdes nio semistrukturerade intervjuer med sjuksk{\"o}terskorna och {\aa}tta med l{\"a}kare. Intervjufr{\aa}gorna grupperades i fyra teman: intervjupersonens bakgrund; arbetsmilj{\"o} relaterat till pilotprojektet; arbetsmilj{\"o} relaterat till det {\"a}ldre s{\"a}ttet att arbeta och n{\aa}gra avslutande fr{\aa}gor. Alla intervjuer transkriberades och tematisk analys gjordes. Inom temat upplevelse av \textbf{krav} visar intervjuerna att det som {\"a}r mest stressande, och som n{\"a}stan stressar alla, {\"a}r k{\"o}bildningen i chattsystemet. Det som stressar {\"a}r otillr{\"a}cklig information om vilka samtal som v{\"a}ntar, och att man inte kan veta n{\"a}r personen man chattar med svarar. Dessutom p{\aa}pekar n{\aa}gra att man inte heller kan se vilka patienter som beh{\"o}ver snabb hj{\"a}lp och vilka som kan v{\"a}nta. Inom temat \textbf{st{\"o}d} kan man konstatera att alla upplever systemen som stabila, och har god inblick i vad man kan g{\"o}ra om systemen inte fungerar. M{\aa}nga tycker att systemen fungerar mycket bra. N{\aa}gra p{\aa}pekar att samarbetet och st{\"o}det fr{\aa}n kollegor som jobbar hemifr{\aa}n inte fungerar lika bra som att arbeta p{\aa} samma plats. Inom omr{\aa}det upplevelse av \textbf{kontroll} finns det f{\"o}rb{\"a}ttringsomr{\aa}den vad det g{\"a}ller vilka fr{\aa}gor som patienten svarat p{\aa}, och sjuksk{\"o}terskor p{\aa}pekade att systemet delvis {\"a}r designat f{\"o}r l{\"a}kare och inte f{\"o}r sjuksk{\"o}terskors arbete. M{\aa}nga upplevde att de kunde vara med och p{\aa}verka i f{\"o}r{\"a}ndringsarbetet, och att de i mycket stor utstr{\"a}ckning varit delaktig. Rapporten inneh{\aa}ller ocks{\aa} n{\aa}gra rekommendationer kring design och implementering av liknande tj{\"a}nster ur ett arbetsmilj{\"o}perspektiv. Slutligen finns l{\"a}stips f{\"o}r den intresserade l{\"a}saren. } } @TechReport{ it:2019-011, author = {Pietro Benedusi and Paola Ferrari and Carlo Garoni and Rolf Krause and Stefano Serra-Capizzano}, title = {Fast Parallel Solver for the Space-Time {IgA-DG} Discretization of the Anisotropic Diffusion Equation}, institution = it, department = tdb, year = 2019, number = {2019-011}, month = nov, abstract = {We consider the space-time discretization of the (linear) anisotropic diffusion equation, using an isogeometric analysis (IgA) approximation in space and a discontinuous Galerkin (DG) approximation in time. Drawing inspiration from a former spectral analysis, we propose for the resulting space-time linear system a new solution method combining a suitable preconditioned GMRES (PGMRES) algorithm with a few iterations of an appropriate multigrid method. The performance of our new solution method is illustrated through numerical experiments, which show its competitiveness in terms of robustness, run-time and parallel scaling.} } @TechReport{ it:2019-010, author = {Umberto Soverini and Torsten S{\"o}derstr{\"o}m}, title = {The {F}risch Scheme: Time and Frequency Domain Aspects}, institution = it, department = syscon, year = 2019, number = {2019-010}, month = nov, abstract = {Several estimation methods have been proposed for identifying errors-in-variables systems, where both input and output measurements are corrupted by noise. One of the more interesting approaches is the Frisch scheme. The method can be applied using either time or frequency domain representations. This paper investigates the general mathematical and geometrical aspects of the Frisch scheme, illustrating the analogies and the differences between the time and frequency domain formulations.} } @TechReport{ it:2019-009, author = {Kristiina Ausmees}, title = {Evaluation of Methods Handling Missing Data in {PCA} on Genotype Data: Applications for Ancient {DNA}}, institution = it, department = tdb, year = 2019, number = {2019-009}, month = oct, abstract = {Principal Component Analysis (PCA) is a method of projecting data onto a basis that maximizes its variance, possibly revealing previously unseen patterns or features. PCA can be used to reduce the dimensionality of multivariate data, and is widely applied in visualization of genetic information. In the field of ancient DNA, it is common to use PCA to show genetic affinities of ancient samples in the context of modern variation. Due to the low quality and sequence coverage often exhibited by ancient samples, such analysis is not straightforward, particularly when performing joint visualization of multiple individuals with non-overlapping sequence data. The PCA transform is based on variances of allele frequencies among pairs of individuals, and discrepancies in overlap may therefore have large effects on scores. As the relative distances between scores are used to infer genetic similarity, it is important to distinguish between the effects of the particular set of markers used and actual genetic affinities. This work addresses the problem of using an existing PCA model to estimate scores of new observations with missing data. We address the particular application of visualizing genotype data, and evaluate approaches commonly used in population genetic analyses as well as other methods from the literature. The methods considered are that of trimmed scores, projection to the model plane, performing PCA individually on samples and subsequently merging them using Procrustes transformation, as well as the two least-squares based methods trimmed score regression and known data regression. Using empirical ancient data, we demonstrate the use of the different methods, and show that discrepancies in the set of loci considered for different samples can have pronounced effects on estimated scores. We also present an evaluation of the methods based on modern data with varying levels of simulated sparsity, concluding that their relative performance is highly data-dependent.} } @TechReport{ it:2019-008, author = {Kristiina Ausmees and Federico Sanchez-Quinto and Mattias Jakobsson and Carl Nettelblad}, title = {An Empirical Evaluation of Genotype Imputation of Ancient {DNA}}, institution = it, department = tdb, year = 2019, number = {2019-008}, month = oct, abstract = {With capabilities of sequencing ancient DNA to high coverage often limited by sample quality or cost, imputation of missing genotypes presents a possibility to increase power of inference as well as cost-effectiveness in analysis of ancient data. However, the high degree of uncertainty often associated with ancient DNA poses several methodological challenges, and performance of imputation methods in this context has not been fully explored. To gain further insights, we performed a systematic evaluation of imputation of ancient data using BEAGLE 4.0 and reference data from phase 3 of the 1000 Genomes project, investigating the effects of coverage, phased reference and study sample size. Making use of five ancient samples with high-coverage data available, we evaluated imputed data with respect to accuracy, reference bias and genetic affinities as captured by PCA. We obtained genotype concordance levels of over 99\% for data with 1x coverage, and similar levels of accuracy and reference bias at levels as low as 0.75x. Our findings suggest that using imputed data can be a realistic option for various population genetic analyses even for data in coverage ranges below 1x. We also show that a large and varied phased reference set as well as the inclusion of low- to moderate-coverage ancient samples can increase imputation performance, particularly for rare alleles. In-depth analysis of imputed data with respect to genetic variants and allele frequencies gave further insight into the nature of errors arising during imputation, and can provide practical guidelines for post-processing and validation prior to downstream analysis.} } @TechReport{ it:2019-007, author = {Malin K{\"a}ll{\'e}n and Tobias Wrigstad}, title = {Performance of an {OO} Compute Kernel on the {JVM}: Revisiting {J}ava as a Language for Scientific Computing Applications ({E}xtended Version)}, institution = it, year = {2019}, number = {2019-007}, month = sep, abstract = {The study of Java as a programming language for scientific computing is warranted by simpler, more extensible and more easily maintainable code. Previous work on refactoring a C++ scientific computing code base to follow best practises of object-oriented software development revealed a coupling of such practises and considerable slowdowns due to indirections introduced by abstractions. In this paper, we explore how Java's JIT compiler handle such abstraction-induced indirection using a typical scientific computing compute kernel extracted from a linear solver written in C++. We find that the computation times for large workloads on one machine can be on-pair for C++ and Java. However, for distributed computations, a better parallelisation strategy needs to be found for non-blocking communication. We also report on the impact on performance for common ``gripes'': garbage collection, array bounds checking, and dynamic binding.} } @TechReport{ it:2019-006, author = {Umberto Soverini and Torsten S{\"o}derstr{\"o}m}, title = {Frequency Domain Identification of {FIR} Models from Noisy Input-Output Data}, institution = it, department = syscon, year = 2019, number = {2019-006}, month = aug, abstract = {This paper describes a new approach for identifying FIR models from a finite number of measurements, in the presence of additive and uncorrelated white noise. In particular, two different frequency domain algorithms are proposed. The first algorithm is based on some theoretical results concerning the dynamic Frisch scheme. The second algorithm maps the FIR identification problem into a quadratic eigenvalue problem. Both methods resemble in many aspects some other identification algorithms, originally developed in the time domain. The features of the proposed methods are compared with each other and with those of some time domain algorithms by means of Monte Carlo simulations.} } @TechReport{ it:2019-005, author = {Giovanni Barbarino and Carlo Garoni and Stefano Serra-Capizzano}, title = {Block Generalized Locally {T}oeplitz Sequences: Theory and Applications in the Multidimensional Case}, institution = it, department = tdb, year = 2019, number = {2019-005}, month = jul, abstract = {In computational mathematics, when dealing with a large linear discrete problem (e.g., a linear system) arising from the numerical discretization of a partial differential equation (PDE), the knowledge of the spectral distribution of the associated matrix has proved to be a useful information for designing/analyzing appropriate solvers---especially, preconditioned Krylov and multigrid solvers---for the considered problem. Actually, this spectral information is of interest also in itself as long as the eigenvalues of the aforementioned matrix represent physical quantities of interest, which is the case for several problems from engineering and applied sciences (e.g., the study of natural vibration frequencies in an elastic material). The theory of multilevel generalized locally Toeplitz (GLT) sequences is a powerful apparatus for computing the asymptotic spectral distribution of matrices $A_n$ arising from virtually any kind of numerical discretization of PDEs. Indeed, when the mesh-fineness parameter $n$ tends to infinity, these matrices $A_n$ give rise to a sequence $\{A_n\}_n$, which often turns out to be a multilevel GLT sequence or one of its ``relatives'', i.e., a multilevel block GLT sequence or a (multilevel) reduced GLT sequence. In particular, multilevel block GLT sequences are encountered in the discretization of systems of PDEs as well as in the higher-order finite element or discontinuous Galerkin approximation of scalar/vectorial PDEs. In this work, we systematically develop the theory of multilevel block GLT sequences as an extension of the theories of (unilevel) GLT sequences \cite{GLT-bookI}, multilevel GLT sequences \cite{GLT-bookII}, and block GLT sequences \cite{bg}. We also present several emblematic applications of this theory in the context of PDE discretizations.} } @TechReport{ it:2019-004, author = {Giovanni Barbarino and Carlo Garoni and Stefano Serra-Capizzano}, title = {Block Generalized Locally {T}oeplitz Sequences: Theory and Applications in the Unidimensional Case}, institution = it, department = tdb, year = 2019, number = {2019-004}, month = jul, abstract = {In computational mathematics, when dealing with a large linear discrete problem (e.g., a linear system) arising from the numerical discretization of a differential equation (DE), the knowledge of the spectral distribution of the associated matrix has proved to be a useful information for designing/analyzing appropriate solvers---especially, preconditioned Krylov and multigrid solvers---for the considered problem. Actually, this spectral information is of interest also in itself as long as the eigenvalues of the aforementioned matrix represent physical quantities of interest, which is the case for several problems from engineering and applied sciences (e.g., the study of natural vibration frequencies in an elastic material). The theory of generalized locally Toeplitz (GLT) sequences is a powerful apparatus for computing the asymptotic spectral distribution of matrices $A_n$ arising from virtually any kind of numerical discretization of DEs. Indeed, when the mesh-fineness parameter $n$ tends to infinity, these matrices $A_n$ give rise to a sequence $\{A_n\}_n$, which often turns out to be a GLT sequence or one of its ``relatives'', i.e., a block GLT sequence or a reduced GLT sequence. In particular, block GLT sequences are encountered in the discretization of systems of DEs as well as in the higher-order finite element or discontinuous Galerkin approximation of scalar/vectorial DEs. This work is a review, refinement, extension, and systematic exposition of the theory of block GLT sequences. It also includes several emblematic applications of this theory in the context of DE discretizations.} } @TechReport{ it:2019-003, author = {Ricardo Alves and Stefanos Kaxiras and David Black-Schaffer}, title = {Minimizing Replay under Way-Prediction}, institution = it, department = docs, year = 2019, number = {2019-003}, month = may, note = {This paper is an extension of another paper published in ICCD 2018 ``Dynamically Disabling Way-prediction to Reduce Instruction Replay''.}, abstract = {Way-predictors are effective at reducing dynamic cache energy by reducing the number of ways accessed, but introduce additional latency for incorrect way-predictions. While previous work has studied the impact of the increased latency for incorrect way-predictions, we show that the \textit{latency variability} has a far greater effect as it forces replay of in-flight instructions on an incorrect way-prediction. To address the problem, we propose a solution that learns the confidence of the way-prediction and dynamically disables it when it is likely to mispredict. We further improve this approach by biasing the confidence to reduce latency variability further at the cost of reduced way-predictions. Our results show that instruction replay in a way-predictor reduces IPC by 6.9\% due to 10\% of the instructions being replayed. Our confidence-based way-predictor degrades IPC by only 2.9\% by replaying just 3.4\% of the instructions, reducing way-predictor cache energy overhead (compared to serial access cache) from 8.5\% to 1.9\%.} } @TechReport{ it:2019-002, author = {C. Garoni and S. Serra-Capizzano}, title = {Block Generalized Locally {T}oeplitz Sequences: Theory and Applications}, institution = it, department = tdb, year = 2019, number = {2019-002}, month = apr, abstract = {When dealing with a large linear system arising from the numerical discretization of a differential equation (DE), the knowledge of the spectral distribution of the associated matrix has proved to be a useful information for designing/analyzing appropriate solvers|especially, preconditioned Krylov and multigrid solvers for the considered system. The theory of generalized locally Toeplitz (GLT) sequences is a powerful apparatus for computing the asymptotic spectral distribution of matrices $A_n$ arising from virtually any kind of numerical discretization of DEs. Indeed, when the mesh-fineness parameter n tends to infinity, these matrices $A_n$ give rise to a sequence $\{ A_n \}$, which often turns out to be a GLT sequence or one of its "relatives", i.e., a block GLT sequence or a reduced GLT sequence. In particular, block GLT sequences are encountered in the discretization of systems of DEs as well as in the higher-order finite element or discontinuous Galerkin approximation of scalar/vectorial DEs. This work is a review, refinement, extension, and systematic exposition of the theory of block GLT sequences. It also includes several emblematic applications of this theory in the context of DE discretizations.} } @TechReport{ it:2018-014, author = {Nikos Nikoleris and Erik Hagersten and Trevor E. Carlson}, title = {Delorean: Virtualized Directed Profiling for Cache Modeling in Sampled Simulation}, institution = it, department = docs, year = 2018, number = {2018-014}, month = dec, abstract = {Current practice for accurate and efficient simulation (e.g., SMARTS and Simpoint) makes use of sampling to significantly reduce the time needed to evaluate new research ideas. By evaluating a small but representative portion of the original application, sampling can allow for both fast and accurate performance analysis. However, as cache sizes of modern architectures grow, simulation time is dominated by warming microarchitectural state and not by detailed simulation, reducing overall simulation efficiency. While checkpoints can significantly reduce cache warming, improving efficiency, they limit the flexibility of the system under evaluation, requiring new checkpoints for software updates (such as changes to the compiler and compiler flags) and many types of hardware modifications. An ideal solution would allow for accurate cache modeling for each simulation run without the need to generate rigid checkpointing data a priori. Enabling this new direction for fast and flexible simulation requires a combination of (1) a methodology that allows for hardware and software flexibility and (2) the ability to quickly and accurately model arbitrarily-sized caches. Current approaches that rely on checkpointing or statistical cache modeling require rigid, up-front state to be collected which needs to be amortized over a large number of simulation runs. These earlier methodologies are insufficient for our goals for improved flexibility. In contrast, our proposed methodology, Delorean, outlines a unique solution to this problem. The Delorean simulation methodology enables both flexibility and accuracy by quickly generating a targeted cache model for the next detailed region on the fly without the need for up-front simulation or modeling. More specifically, we propose a new, more accurate statistical cache modeling method that takes advantage of hardware virtualization to precisely determine the memory regions accessed and to minimize the time needed for data collection while maintaining accuracy. Delorean uses a multi-pass approach to understand the memory regions accessed by the next, upcoming detailed region. Our methodology collects the entire set of key memory accesses and, through fast virtualization techniques, progressively scans larger, earlier regions to learn more about these key accesses in an efficient way. Using these techniques, we demonstrate that Delorean allows for the fast evaluation of systems and their software though the generation of accurate cache models on the fly. Delorean outperforms previous proposals by an order of magnitude, with a simulation speed of 150 MIPS and a similar average CPI error (below 4\%).} } @TechReport{ it:2018-013, author = {Stanislav Morozov and Stefano Serra-Capizzano and Eugene Tyrtyshnikov}, title = {How to Extend the Application Scope of {GLT}-Sequences}, institution = it, department = tdb, year = 2018, number = {2018-013}, month = nov, abstract = {In this paper we address the problem of finding the distribution of eigenvalues and singular values for matrix sequences. The main focus of this paper is the spectral distribution for matrix sequences arising in discretization of PDE. In the last two decades the theory of GLT-sequences aimed at this problem has been developed. We investigate the possibility of application of GLT-theory to discretization of PDE on non-rectangular domains and show that in many cases the present GLT-theory is insufficient. We also propose a generalization of GLT-sequences that enables one to cope with a wide range of PDE discretization problems defined on polygonal domains.} } @TechReport{ it:2018-012, author = {Sven-Erik Ekstr{\"o}m and Stefano Serra-Capizzano}, title = {Eigenvalue Isogeometric Approximations Based on {B}-splines: Tools and Results}, institution = it, department = tdb, year = 2018, number = {2018-012}, month = jul, abstract = {In such a short note we consider the spectral analysis of large matrices coming from the numerical approximation of the eigenvalue problem \[ -(a(x)u'(x))'=\lambda b(x) u(x),\quad\quad x\in (0,1), \] where $u(0)$ and $u(1)$ are given, by using isogeometric methods based on B-splines. We give precise estimates for the extremal eigenvalues and global distributional results. The techniques involve dyadic decomposition arguments, the GLT analysis, and basic extrapolation methods.} } @TechReport{ it:2018-011, author = {Tatiana Chistiakova and Per Mattsson and Bengt Carlsson and Torbj{\"o}rn Wigren}, title = {Nonlinear System Identification of the Dissolved Oxygen to Effluent Ammonium Dynamics in an Activated Sludge Process}, institution = it, department = syscon, year = 2018, number = {2018-011}, month = jun, abstract = {Aeration of biological reactors in wastewater treatment plants is important to obtain a high removal of soluble organic matter as well as for nitrification but requires a significant use of energy. It is hence of importance to control the aeration rate, for example, by ammonium feedback control. The goal of this report is to model the dynamics from the set point of an existing dissolved oxygen controller to effluent ammonium using two types of system identification methods for a Hammerstein model, including a newly developed recursive variant. The models are estimated and evaluated using noise corrupted data from a complex mechanistic model (Activated Sludge Model no.1). The performances of the estimated nonlinear models are compared with an estimated linear model and it is shown that the nonlinear models give a significantly better fit to the data. The resulting models may be used for adaptive control (using the recursive Hammerstein variant), gain-scheduling control, L2 stability analysis, and model based fault detection.} } @TechReport{ it:2018-010, author = {Owe Axelsson and Maya Neytcheva}, title = {Preconditioners for Two-by-Two Block Matrices with Square Blocks}, institution = it, department = tdb, year = 2018, number = {2018-010}, month = may, abstract = {Two-by-two block matrices with square blocks arise in the numerical treatment of numerous applications of practical significance, such as optimal control problems, constrained by a state equation in the form of partial differential equations, multiphase models, solving complex linear systems in real arithmetics, to name a few.a Such problems lead to algebraic systems of equations with matrices of a certain two-by-two block form. For such matrices, a number of preconditioners has been proposed, some of them with tight eigenvalue bounds. In this paper it is shown that in particular one of them, referred to as PRESB, is very efficient, not only giving robust, favourable properties of the spectrum but also enabling an efficient implementation with low computational complexity. Various applications and generalizations of this preconditioning technique, such as in time-harmonic parabolic and Stokes equations, eddy current electromagnetic problems and problems with additional box-constraints, i.e. upper and/or lower bounds of the solution, are also discussed. The method is based on the use of coupled inner-outer iterations, where the inner iteration can be performed to various relative accuracies. This leads to variable preconditioners, thus, a flexible version of a Krylov subspace iteration method must be used. Alternatively, some version of a defect-correction iterative method can be applied. } } @TechReport{ it:2018-009, author = {Carlo Garoni and Mariarosa Mazza and Stefano Serra-Capizzano}, title = {Block Generalized Locally Toeplitz Sequences: From the Theory to the Applications}, institution = it, department = tdb, year = 2018, number = {2018-009}, month = may, abstract = {The theory of generalized locally Toeplitz (GLT) sequences is a powerful apparatus for computing the asymptotic spectral distribution of matrices $A_n$ arising from virtually any kind of numerical discretization of differential equations (DEs). Indeed, when the mesh fineness parameter $n$ tends to infinity, these matrices $A_n$ give rise to a sequence $\{A_n\}_n$, which often turns out to be a GLT sequence or one of its ``relatives'', i.e., a block GLT sequence or a reduced GLT sequence. In particular, block GLT sequences are encountered in the discretization of systems of DEs as well as in the higher-order finite element or discontinuous Galerkin approximation of scalar DEs. Despite the applicative interest, a solid theory of block GLT sequences has been developed only recently, in 2018. The purpose of the present paper is to illustrate the potential of this theory by presenting a few noteworthy examples of applications in the context of DE discretizations.} } @TechReport{ it:2018-008, author = {Owe Axelsson and Maya Neytcheva and Anders Str{\"o}m}, title = {An Efficient Preconditioning Method for State Box-Constrained Optimal Control Problems}, institution = it, department = tdb, year = 2018, number = {2018-008}, month = mar, note = {This is a major revision of Technical Report 2017-004. In the new version all the numerical experiments have been rerun with new much more efficient dynamic stopping criteria.}, abstract = {An efficient preconditioning technique used earlier for two-by-two block matrix systems with square matrix blocks is shown to be applicable also for a state variable box-constrained optimal control problem. The problem is penalized by a standard regularization term for the control variable and for the box-constraint, using a Moreau-Yosida penalization method. It is shown that there occur very few nonlinear iteration steps and also few iterations to solve the arising linearized equations on the fine mesh. This holds for a wide range of the penalization and discretization parameters. The arising nonlinearity can be handled with a hybrid nonlinear-linear procedure that raises the computational efficiency of the overall solution method.} } @TechReport{ it:2018-007, author = {Umberto Soverini and Torsten S{\"o}derstr{\"o}m}, title = {Identification of Two Dimensional Complex Sinusoids in White Noise: a State-Space Frequency Approach}, institution = it, department = syscon, year = 2018, number = {2018-007}, month = apr, note = {Updated version of Technical Report 2017-021.}, abstract = {This paper proposes a new frequency domain approach for identifying the parameters of two-dimensional complex sinusoids from a finite number of data, when the measurements are affected by additive and uncorrelated two-dimensional white noise. The new method extends in two dimensions a frequency identification procedure of complex sinusoids, originally developed for the one-dimensional case. The properties of the proposed method are analyzed by means of Monte Carlo simulations and its features are compared with those of other estimation algorithms. In particular the practical advantage of the method is highlighted. In fact the novel approach can operate just on a specified sub-area of the 2D spectrum. This area-selective feature allows a drastic reduction of the computational complexity, which is usually very high when standard time domain methods are used.} } @TechReport{ it:2018-006, author = {Umberto Soverini and Torsten S{\"o}derstr{\"o}m}, title = {2D-Frequency Domain Identification of Complex Sinusoids in the Presence of Additive Noise}, institution = it, department = syscon, year = 2018, number = {2018-006}, month = apr, note = {Updated version of Technical Report 2017-020.}, abstract = {This paper describes a new approach for identifying the parameters of two-dimensional complex sinusoids from a finite number of measurements, in presence of additive and uncorrelated two-dimensional white noise. The proposed approach is based on using frequency domain data. The new method extends to the two-dimensional (2D) case some recent results obtained with reference to the frequency ESPRIT algorithm. The properties of the proposed method are analyzed by means of Monte Carlo simulations and its features are compared with those of a classical time domain estimation algorithm. The practical advantages of the method are highlighted. In fact the novel approach can operate just on a specified sub-area of the 2D spectrum. This area-selective feature allows a drastic reduction of the computational complexity, which is usually very high when standard time domain methods are used.} } @TechReport{ it:2018-005, author = {Sven-Erik Ekstr{\"o}m and Isabella Furci and Stefano Serra-Capizzano}, title = {Exact Formulae and Matrix-Less Eigensolvers for Block Banded Symmetric {T}oeplitz Matrices}, institution = it, department = tdb, year = 2018, number = {2018-005}, month = mar, abstract = {Precise asymptotic expansions for the eigenvalues of a Toeplitz matrix $T_n(f)$, as the matrix size $n$ tends to infinity, have recently been obtained, under suitable assumptions on the associated generating function $f$. A restriction is that $f$ has to be polynomial, monotone, and scalar-valued. In this paper we focus on the case where $\mathbf{f}$ is an $s\times s$ matrix-valued trigonometric polynomial with $s\ge 1$, and $T_n(\mathbf{f})$ is the block Toeplitz matrix generated by $\mathbf{f}$, whose size is $N(n,s)=sn$. The case $s=1$ corresponds to that already treated in the literature. We numerically derive conditions which ensure the existence of an asymptotic expansion for the eigenvalues. Such conditions generalize those known for the scalar-valued setting. Furthermore, following a proposal in the scalar-valued case by the first author, Garoni, and the third author, we devise an extrapolation algorithm for computing the eigenvalues of banded symmetric block Toeplitz matrices with a high level of accuracy and a low computational cost. The resulting algorithm is an eigensolver that does not need to store the original matrix, does not need to perform matrix-vector products, and for this reason is called {\em matrix-less}. We use the asymptotic expansion for the efficient computation of the spectrum of special block Toeplitz structures and we provide exact formulae for the eigenvalues of the matrices coming from the $\mathbb{Q}_p$ Lagrangian Finite Element approximation of a second order elliptic differential problem. Numerical results are presented and critically discussed.} } @TechReport{ it:2018-004, author = {Giovanni Barbarino and Stefano Serra-Capizzano}, title = {Non-{H}ermitian Perturbations of {H}ermitian Matrix-Sequences and Applications to the Spectral Analysis of Approximated {PDE}s}, institution = it, department = tdb, year = 2018, number = {2018-004}, month = feb, abstract = {This paper concerns the spectral analysis of matrix-sequences which can be written as a non-Hermitian perturbation of a given Hermitian matrix-sequence. The main result reads as follows. Suppose that $X_n$ is a Hermitian matrix of size $n$ and that $\{X_n\}_n\sim_{\lambda} f$, i.e., the matrix-sequence $\{X_n\}_n$ enjoys an asymptotic spectral distribution, in the Weyl sense, described by a Lebesgue measurable function $f$; if $\|Y_n\|_2 = o(\sqrt n)$ with $\|\cdot\|_2$ being the Schatten 2 norm, then $\{X_n+Y_n\}_n\sim_{\lambda} f$. In a previous paper by Leonid Golinskii and the second author a similar result was proved, but under the technical restrictive assumption that the involved matrix-sequences $\{ X_n\}_n$ and $\{ Y_n\}_n$ are uniformly bounded in spectral norm. Nevertheless, the result had a remarkable impact in the analysis of both spectral distribution and clustering of matrix-sequences arising from various applications, including the numerical approximation of partial differential equations (PDEs) and the preconditioning of PDE discretization matrices. The new result considerably extends the spectral analysis tools provided by the former one, and in fact we are now allowed to analyse variable-coefficient PDEs with unbounded coefficients, preconditioned matrix-sequences, etc. A few selected applications are considered, extensive numerical experiments are discussed, and a further conjecture is illustrated at the end of the paper.} } @TechReport{ it:2018-003, author = {Jonatan Lind{\'e}n and Bengt Jonsson}, title = {A Skiplist-Based Concurrent Priority Queue with Minimal Memory Contention}, institution = it, department = docs, year = 2018, number = {2018-003}, month = feb, note = {Revised and corrected version of Technical Report 2013-025.}, abstract = {Priority queues are fundamental to many multiprocessor applications. Several priority queue algorithms based on skiplists have been proposed, as skiplists allow concurrent accesses to different parts of the data structure in a simple way. However, for priority queues on multiprocessors, an inherent bottleneck is the operation that deletes the minimal element. We present a linearizable, lock-free, concurrent priority queue algorithm, based on skiplists, which minimizes the contention for shared memory that is caused by the \textsc{DeleteMin} operation. The main idea is to minimize the number of global updates to shared memory that are performed in one \textsc{DeleteMin}. In comparison with other skiplist-based priority queue algorithms, our algorithm achieves a 30 - 80\% improvement.} } @TechReport{ it:2018-002, author = {Pietro Benedusi and Carlo Garoni and Rolf Krause and Xiaozhou Li and Stefano Serra-Capizzano}, title = {Discontinuous {G}alerkin Discretization of the Heat Equation in Any Dimension: the Spectral Symbol}, institution = it, department = tdb, year = 2018, number = {2018-002}, month = jan, abstract = {The multidimensional heat equation, along with its more general version involving variable diffusion coefficients, is discretized by a discontinuous Galerkin (DG) method in time and a finite element (FE) method of arbitrary regularity in space. We show that the resulting space-time discretization matrices enjoy an asymptotic spectral distribution as the mesh fineness increases, and we determine the associated spectral symbol, i.e., the function that carefully describes the spectral distribution. The analysis of this paper is carried out in a stepwise fashion, without omitting details, and it is supported by several numerical experiments. It is preparatory to the development of specialized solvers for linear systems arising from the DG/FE approximation of the heat equation in the case of both constant and variable diffusion coefficients.} } @TechReport{ it:2018-001, author = {Carlo Garoni and Stefano Serra-Capizzano and Debora Sesana}, title = {The Theory of Block Generalized Locally Toeplitz Sequences}, institution = it, department = tdb, year = 2018, number = {2018-001}, month = jan, note = {Updated 2018-01-30.}, abstract = {The theory of generalized locally Toeplitz (GLT) sequences is a powerful apparatus for computing the asymptotic singular value and eigenvalue distribution of matrices $A_n$ arising from virtually any kind of numerical discretization of differential equations (DEs). Indeed, when the discretization parameter $n$ tends to infinity, these matrices $A_n$ give rise to a sequence $\{A_n\}_n$, which often turns out to be a GLT sequence or one of its `relatives', i.e., a block GLT sequence or a reduced GLT sequence. In particular, block GLT sequences are encountered in the discretization of systems of DEs as well as in the higher-order finite element or discontinuous Galerkin approximation of scalar DEs. Despite the applicative importance, a solid theory of block GLT sequences is still missing. The purpose of the present paper is to develop this theory in a systematic way.} } @TechReport{ it:2017-024, title = {Coupled Electric Drives Data Set and Reference Models}, author = {Torbj{\"o}rn Wigren and Maarten Schoukens}, institution = it, department = syscon, year = 2017, number = {2017-024}, month = nov, note = {The data set can be downloaded from \url{http://www.it.uu.se/research/publications/reports/2017-024/CoupledElectricDrivesDataSetAndReferenceModels.zip}.} , abstract = {The following report provides a description of the CE8 coupled electric drives laboratory process. A first set of continuous time model structures that are suitable to describe the nonlinear dynamics are presented. The data sets, which are available in .mat and .csv file formats, are then described in detail. The available data sets are short, which constitute a challenge when performing identification. In support of future work, Wiener models are identified with a recursive algorithm that is parameterized in continuous time. This approach reduces the number of parameters to four for identification of third order dynamics.} } @TechReport{ it:2017-023, author = {Kim-Anh Tran and Alexandra Jimborean and Trevor E. Carlson and Magnus Sj{\"a}lander and Konstantinos Koukos and Stefanos Kaxiras}, title = {Transcending Hardware Limits with Software Out-Of-Order Execution}, institution = it, department = docs, year = 2017, number = {2017-023}, month = oct, abstract = {Reducing the widening gap between processor and memory speed has been steering processors' design over the last decade, as memory accesses became the main performance bottleneck. Out-of-order architectures attempt to hide memory latency by dynamically reordering instructions, while in-order architectures are restricted to static instruction schedules. We propose a software-hardware co-design to break out of the hardware limits of existing architectures and attain increased memory and instruction level parallelism by orchestrating coarse-grain out-of-program-order execution in software (SWOOP). On in-order architectures, SWOOP acts as a virtual reorder buffer (ROB) while out-of-order architectures are endowed with the ability to jump ahead to independent code, far beyond the reach of the ROB. We build upon the decoupled access-execute model, however, executed in a single superscalar pipeline and within a single thread of control. The compiler generates the Access and Execute code slices and orchestrates their execution out-of-order, with the support of frugal microarchitectural enhancements to maximize efficiency. SWOOP significantly improves the performance of memory-bound applications by 42\% on in-order cores, and by 43\% on out-of-order architectures. Furthermore, not only is SWOOP competitive with out-of-order cores which benefit from double-sized reorder buffers, but it is also considerably more energy efficient. } } @TechReport{ it:2017-022, author = {Ali Dorostkar}, title = {Function-Based Algebraic Multigrid Method for the {3D} {P}oisson Problem on Structured Meshes}, institution = it, department = tdb, year = 2017, number = {2017-022}, month = oct, abstract = {Multilevel methods, such as Geometric and Algebraic Multigrid, Algebraic Multilevel Iteration, Domain Decomposition-type methods have been shown to be the methods of choice for solving linear systems of equations, arising in many areas of Scientific Computing. The methods, in particular the multigrid methods, have been efficiently implemented in serial and parallel and are available via many scientific libraries. The multigrid methods are primarily used as preconditioners for various Krylov subspace iteration methods. They exhibit convergence that is independent or nearly independent on the number of degrees of freedom and can be tuned to be also robust with respect to other problem parameters. The methods also possess optimal computational complexity. As a drawback, of particular importance when solving very large scale problems, we point out their high demand for computer memory. Since the methods utilize hierarchical structures, where the amount of computations decreases and that of communication increases, their parallel implementation might exhibit lesser scalability. Further, as the implementation usually relies on sparse matrix-vector multiplications, this may also decrease parallel performance for very large problems. In this work we utilize a different framework to construct multigrid methods, based on an analytical function representation of the matrix, that may keep the amount of computation high and local and may reduce significantly the memory requirements. The approach is particularly suitable for modern computer architectures. An implementation of the latter for the three-dimensional discrete Laplace operator is derived and implemented. The same function representation technology is used to construct smoothers of approximate inverse type.}, note = {Updated 2017-10-26.} } @TechReport{ it:2017-021, author = {Umberto Soverini and Torsten S{\"o}derstr{\"o}m}, title = {Identification of Two Dimensional Complex Sinusoids in White Noise: a State-Space Frequency Approach}, institution = it, department = syscon, year = 2017, number = {2017-021}, month = oct, note = {Updated by Technical Report 2018-007, April 2018. See \url{http://www.it.uu.se/research/publications/reports/2018-007}.} , abstract = {This paper proposes a new frequency domain approach for identifying the parameters of two–dimensional complex sinusoids from a finite number of data, when the measurements are affected by additive and uncorrelated two–dimensional white noise. The new method extends in two dimensions a frequency identification procedure of complex sinusoids, originally developed for the one–dimensional case. The properties of the proposed method are analyzed by means of Monte Carlo simulations and its features are compared with those of other estimation algorithms. In particular the practical advantage of the method will be highlighted. In fact the novel approach can operate just on a specified sub–area of the 2D spectrum. This area–selective feature allows a drastic reduction of the computational complexity, which is usually very high when standard time domain methods are used.} } @TechReport{ it:2017-020, author = {Umberto Soverini and Torsten S{\"o}derstr{\"o}m}, title = {2D-Frequency Domain Identification of Complex Sinusoids in the Presence of Additive Noise}, institution = it, department = syscon, year = 2017, number = {2017-020}, month = oct, note = {Updated by Technical Report 2018-006, April 2018. See \url{http://www.it.uu.se/research/publications/reports/2018-006}.} , abstract = {This paper describes a new approach for identifying the parameters of two–dimensional complex sinusoids from a finite number of measurements, in presence of additive and uncorrelated two–dimensional white noise. The proposed approach is based on using frequency domain data. As a major feature, it enables the estimation to be frequency selective. The new method extends to the two–dimensional (2D) case some recent results obtained with reference to the frequency ESPRIT algorithm. The properties of the proposed method are analyzed by means of Monte Carlo simulations and its features are compared with those of a classical time domain estimation algorithm. The practical advantages of the method are highlighted. In fact the novel approach can operate just on a specified sub–area of the 2D spectrum. This area–selective feature allows a drastic reduction of the computational complexity, which is usually very high when standard time domain methods are used.} } @TechReport{ it:2017-019, author = {Zhouyang Ge and Hanna Holmgren and Martin Kronbichler and Luca Brandt and Gunilla Kreiss}, title = {Effective Slip Over Partially Filled Microcavities and its Possible Failure}, institution = it, department = tdb, year = 2017, number = {2017-019}, month = sep, abstract = {Microscale roughness on an otherwise smooth hydrophobic surface can significantly reduce the resistance to an external liquid flow. We study static drag reduction over a lubricant- infused surface by looking at an array of two-dimensional transverse grooves partially filled with a second immiscible fluid. Numerical simulations at separate length scales are used to probe the static drag reduction property and the dynamic wetting behavior. Nano-scale phase field simulations are used to extract the characteristic contact line velocities. Micron-scale two-phase simulations using the level set method are used to model the interface deformation and the flow in and above the cavities. We study the dependence of the effective slip by varying viscosity ratios, capillary numbers, the static contact angle and the filling rate of the cavity (meaning the amount of lubricant fluid). We observe an increase of the effective slip with the cavity filling and identify a potentially new failure mode.} } @TechReport{ it:2017-018, author = {Antonio Cicone and Carlo Garoni and Stefano Serra-Capizzano}, title = {Spectral and Convergence Analysis of the Discrete {ALIF} Method}, institution = it, department = tdb, year = 2017, number = {2017-018}, month = sep, abstract = {The Adaptive Local Iterative Filtering (ALIF) method is a recently proposed iterative procedure to decompose a signal into a finite number of ``simple components'' called intrinsic mode functions. % It is an alternative to the well-known and widely used empirical mode decomposition, a method proposed in 1998 by Huang and his research team at NASA. % In this paper we consider the discrete version of the ALIF method and we perform a spectral analysis of the related iteration matrices, with particular focus on the eigenvalue clustering and the eigenvalue distribution. % Based on the eigenvalue distribution, we formulate a necessary condition for the convergence of the Discrete ALIF method. We also provide a simple criterion to construct appropriate filters satisfying this condition. % Our analysis, which represents a first step toward a complete mathematical understanding of the ALIF method, relies on recent matrix-theoretic results about sampling matrices and, above all, on the theory of generalized locally Toeplitz sequences, which we extend in this paper. % Several numerical examples in support of the theoretical analysis are presented.} } @TechReport{ it:2017-017, author = {Owe Axelsson and Zhao-Zheng Liang and Maya Neytcheva}, title = {Parallel Solution Methods and Preconditioners for Evolution Equations}, institution = it, department = tdb, year = 2017, number = {2017-017}, month = sep, abstract = {The recent development of the high performance computer platforms shows a clear trend towards heterogeneity and hierarchy. In order to utilize the computational power particular attention must be paid to finding new algorithms or adjust existing ones so that they better match the HPC computer architecture. In this work we consider an alternative to classical time-stepping methods based on use of time-harmonic properties and discuss solution approaches that allow efficient utilization of modern HPC resources. The method in focus is based on a truncated Fourier expansion of the solution of an evolutionary problem. The analysis is done for linear equations and it is remarked on the possibility to use two- or multilevel methods for nonlinear problems, which can add to an even higher degree of parallelization. The arising block matrix system to be solved admits a two-by-two block form with square blocks, for which a very efficient preconditioner exists. It leads to tight eigenvalue bounds for the preconditioned matrix and, hence, to a very fast convergence of a preconditioned Krylov subspace or iterative refinement method. The analytical background is shown as well as some illustrating numerical examples. } } @TechReport{ it:2017-016, author = {Sven-Erik Ekstr{\"o}m and Isabella Furci and Stefano Serra-Capizzano}, title = {Are the Eigenvalues of the B-spline IgA Approximation of $-\Delta u = \lambda u$ Known in Almost Closed Form?}, institution = it, department = tdb, year = 2017, number = {2017-016}, month = aug, abstract = {In this paper we consider the B-spline IgA approximation of the second-order eigenvalue problem $-\Delta u = \lambda u$ on $\Omega=(0,1)^d$, with zero Dirichlet boundary conditions and with $\Delta = \sum_{j=1}^{d} \frac{\partial^2}{\partial x_j^2}$, $d\ge 1$. We use B-splines of degree ${\bf p}=(p_1,\ldots,p_d)$ and maximal smoothness and we consider the natural Galerkin approach. By using elementary tensor arguments, we show that the eigenvalue-eigenvector structure of the discrete problem can be reduced to the case of $d=1, p\ge 1$, regularity $C^{p-1}$, with coefficient matrix $L_n^{[p]}$ having size $N(n,p)=n+p-2$. In previous works, it has been established that the normalized sequence $\{n^{-2}L_n^{[p]}\}_n$ has a canonical distribution in the eigenvalue sense and the so-called spectral symbol $e_p(\theta)$ has been identified. In this paper we provide numerical evidence of a precise asymptotic expansion for the eigenvalues, which obviously begins with the function $e_p$, up to the largest $n_p^{\rm out}=p+{\rm mod}(p,2)-2$ eigenvalues which behave as outliers. More precisely, for every integer $\alpha\ge0$, every $n$, every $p\ge 3$ and every $j=1,\ldots,\hat N=N(n,p)-n_p^{\rm out}=n-{\rm mod}(p,2)$, the following asymptotic expansion holds: \begin{align*} \lambda_j(n^{-2} L_n^{[p]})=e_p(\theta_{j,n,p})+\sum_{k=1}^{\alpha}c_k^{(p)}(\theta_{j,n,p})h^k+E_{j,n,\alpha}^{(p)}, \end{align*} where: \begin{itemize} \item the eigenvalues of $n^{-2}L_n^{[p]}$ are arranged in nondecreasing order and $e_p$ is increasing; \item $\{c_k^{(p)}\}_{k=1,2,\ldots}$ is a sequence of functions from $[0,\pi]$ to $\mathbb R$ which depends only on $e_p$; \item for any $p\ge 3$ and $k$, there exists $\bar\theta(p,k)>0$ such that $c_k^{(p)}$ vanishes (at least numerically) on the whole nontrivial interval $[0,\bar\theta(p,k)]$, so that the formula is exact, up to machine precision, for a large portion of the small eigenvalues; \item $h=\frac{1}{n}$ and $\theta_{j,n,p}=\frac{j\pi}{n}=j\pi h$, $j=1,\ldots, n-{\rm mod}(p,2)$; \item $E_{j,n,\alpha}^{(p)}=O(h^{\alpha+1})$ is the remainder (the error), which satisfies the inequality $|E_{j,n,\alpha}^{(p)}|\le C_{\alpha} h^{\alpha+1}$ for some constant $C_{\alpha}$ depending only on $\alpha$ and $e_p$. \end{itemize} For the case $p=1,2$ the complete structure of the eigenvalues and eigenvectors is identified exactly. Indeed, for such values of $p$, the matrices $L_n^{[p]}$ belong to Toeplitz -minus- Hankel algebras and this is also the reason why there are no outliers, that is $n_p^{\rm out}=0$. Moreover, for $p\ge 3$ and based on the eigenvalue asymptotics for $n^{-2}L_n^{[p]}$, we devise an extrapolation algorithm for computing the eigenvalues of the discrete problem with a high level of accuracy and with a relatively negligible computational cost. However, the algorithm is not necessary for all the spectrum and indeed, for $p\ge 3$ and $\theta_{j,n,p}$ belonging to the interval $[0,\bar\theta(p)]$, $\bar\theta(p)=\inf_k \bar\theta(p,k)$, the value $e_p(\theta_{j,n,p})$ coincides with $\lambda_j(n^{-2} L_n^{[p]})$, up to machine precision. Such expansions are of the same type studied in the literature for the eigenvalues of a sequence of Toeplitz matrices $\{T_n(f)\}$ and of a sequence of preconditioned Toeplitz matrices $\{T_n^{-1}(g)T_n(f)\}$, for $f$ trigonometric polynomial, $g$ nonnegative, not identically zero trigonometric polynomial. Extensive numerical experiments are discussed and further future research steps are illustrated at the end of the paper. } } @TechReport{ it:2017-015, author = {Sven-Erik Ekstr{\"o}m and Carlo Garoni}, title = {An Interpolation-Extrapolation Algorithm for Computing the Eigenvalues of Preconditioned Banded Symmetric Toeplitz Matrices}, institution = it, department = tdb, year = 2017, number = {2017-015}, month = aug, abstract = {In the past few years, Bogoya, B{\"o}ttcher, Grudsky, and Maximenko obtained for the eigenvalues of a Toeplitz matrix $T_n(f)$, under suitable assumptions on the generating function $f$, the precise asymptotic expansion as the matrix size $n$ goes to infinity. On the basis of several numerical experiments, it was conjectured by Serra-Capizzano that a completely analogous expansion also holds for the eigenvalues of the preconditioned Toeplitz matrix $T_n(u)^{-1}T_n(v)$, provided $f=v/u$ is monotone and further conditions on $u$ and $v$ are satisfied. Based on this expansion, we here propose and analyze an interpolation--extrapolation algorithm for computing the eigenvalues of $T_n(u)^{-1}T_n(v)$. We illustrate the performance of the algorithm through numerical experiments and we also present its generalization to the case where $f=v/u$ is non-monotone.} } @TechReport{ it:2017-014, author = {Zhao-Zheng Liang and Owe Axelsson and Maya Neytcheva}, title = {A Robust Structured Preconditioner for Time-Harmonic Parabolic Optimal Control Problems}, institution = it, department = tdb, year = 2017, number = {2017-014}, month = aug, abstract = {We consider the iterative solution of optimal control problems constrained by the time-harmonic parabolic equations. Due to the time-harmonic property of the control equations, a suitable discretization of the corresponding optimality systems leads to a large complex linear system with special two-by-two block matrix of saddle point form. For this algebraic system, an efficient preconditioner is constructed, which results in a fast Krylov subspace solver, that is robust with respect to the mesh size, frequency and regularization parameters. Furthermore, the implementation is straightforward and the computational complexity is of optimal order, linear in the number of degrees of freedom. We show that the eigenvalue distribution of the corresponding preconditioned matrix leads to a condition number bounded above by 2. Numerical experiments confirming the theoretical derivations are presented, including comparisons with some other existing preconditioners.} } @TechReport{ it:2017-013, author = {Davide Bianchi and Stefano Serra-Capizzano}, title = {Spectral Analysis of Finite-Difference Approximations of $1-d$ Waves in Non-Uniform Grids}, institution = it, department = tdb, year = 2017, number = {2017-013}, month = jul, abstract = {Preserving the finite positive velocity of propagation of continuous solutions of wave equations is one of the key issues, when building numerical approximation schemes for control and inverse problems. And this is hard to achieve uniformly on all possible ranges of numerical solutions. In particular, high frequencies often generate spurious numerical solutions, behaving in a pathological manner and making the propagation properties of continuous solutions fail. The latter may lead to the divergence of the ``most natural" approximation procedures for numerical control or identification problems. On the other hand, the velocity of propagation of high frequency numerical wave-packets, the so-called group velocity, is well known to be related to the spectral gap of the corresponding discrete spectra. Furthermore most numerical schemes in uniform meshes fail to preserve the uniform gap property and, consequently, do not share the propagation properties of continuous waves. However, recently, S. Ervedoza, A. Marica and the E. Zuazua have shown that, in $1-d$, uniform propagation properties are ensured for finite-difference schemes in suitable non-uniform meshes behaving in a monotonic manner. The monotonicity of the mesh induces a preferred direction of propagation for the numerical waves. In this way, meshes that are suitably designed can ensure that all numerical waves reach the boundary in an uniform time, which is the key for the fulfillment of boundary controllability properties. In this paper we study the gap of discrete spectra of the Laplace operator in $1-d$ for non-uniform meshes, analysing the corresponding spectral symbol, which allows to show how to design the discretization grid for improving the gap behaviour. The main tool is the study of an univariate monotonic version of the spectral symbol, obtained by employing a proper rearrangement. The analytical results are illustrated by a number of numerical experiments. We conclude discussing some open problems.} } @TechReport{ it:2017-012, author = {Fayyaz Ahmad and Eman Salem Al-Aidarous and Dina Abdullah Alrehaili and Sven-Erik Ekstr{\"o}m and Isabella Furci and Stefano Serra-Capizzano}, title = {Are the Eigenvalues of Preconditioned Banded Symmetric {T}oeplitz Matrices Known in Almost Closed Form?}, institution = it, department = tdb, year = 2017, number = {2017-012}, month = jun, abstract = {Bogoya, B{\"o}ttcher, Grudsky, and Maximenko have recently obtained the precise asymptotic expansion for the eigenvalues of a sequence of Toeplitz matrices $\{T_n(f)\}$, under suitable assumptions on the associated generating function $f$. In this paper we provide numerical evidence that some of these assumptions can be relaxed and extended to the case of a sequence of preconditioned Toeplitz matrices $\{T_n^{-1}(g)T_n(f)\}$, for $f$ trigonometric polynomial, $g$ nonnegative, not identically zero trigonometric polynomial, $r=f/g$, and where the ratio $r(\cdot)$ plays the same role as $f(\cdot)$ in the nonpreconditioned case. Moreover, based on the eigenvalue asymptotics, we devise an extrapolation algorithm for computing the eigenvalues of preconditioned banded symmetric Toeplitz matrices with a high level of accuracy, with a relatively low computational cost, and with potential application to the computation of the spectrum of differential operators.} } @TechReport{ it:2017-011, author = {Sholeh Yasini and Torbj{\"o}rn Wigren}, title = {Parametric Convergence Analysis of an Adaptive Filtering Algorithm with Output Quantization}, institution = it, department = tdb, year = 2017, number = {2017-011}, month = jun, abstract = {This report analyses global parametric convergence properties of a recursive algorithm for identification of linear finite impulse response (FIR) dynamics with quantized output measurements. The problem is addressed by analytic calculation of the right hand side of the associated ODE for a second order model. The ODE is then analysed numerically to investigate the possibilities for proving parametric convergence based on the coupling between the switch point and the dynamic gain.} } @TechReport{ it:2017-010, author = {Sven-Erik Ekstr{\"o}m and Stefano Serra-Capizzano}, title = {Eigenvalues and Eigenvectors of Banded {T}oeplitz Matrices and the Related Symbols}, institution = it, department = tdb, year = 2017, number = {2017-010}, month = may, abstract = {It is known that for the tridiagonal Toeplitz matrix, having the main diagonal with constant $a_0=2$ and the two first off-diagonals with constants $a_{1}=-1$ (lower) and $a_{-1}=-1$ (upper), there exists closed form formulas, giving the eigenvalues of the matrix and a set of associated eigenvectors. The latter matrix corresponds to the well known case of the 1D discrete Laplacian, but with a little care the formulas can be generalized to any triple $(a_0,a_{1},a_{-1})$ of complex values. In the first part of this article, we consider a tridiagonal Toeplitz matrix of the same form $(a_0,a_{\omega},a_{-\omega})$, but where the two off-diagonals are positioned $\omega$ steps from the main diagonal instead of only one. We show that its eigenvalues and eigenvectors also can be identified in closed form. To achieve this, ad hoc sampling grids have to be considered, in connection with a new symbol associated with the standard Toeplitz generating function. In the second part, we restrict our attention to the symmetric real case ($a_0,a_{\omega}=a_{-\omega}$ real values) and we analyze the relations with the standard generating function of the Toeplitz matrix. Furthermore, as numerical evidences clearly suggest, it turns out that the eigenvalue behavior of a general banded symmetric Toeplitz matrix with real entries can be described qualitatively in terms of that of the symmetrically sparse tridiagonal case with real $a_0$, $a_{\omega}=a_{-\omega}$, $\omega=2,3,\ldots$, and also quantitatively in terms of that having monotone symbols, as those related to classical Finite Difference discretization of the operators $(-1)^q \frac{\partial^{2q}}{\partial x^{2q}}$, where the case of $q=1$ coincides with $a_0=2$, $a_{1}=a_{-1}=-1$.} } @TechReport{ it:2017-009, author = {Mariarosa Mazza and Ahmed Ratnani and Stefano Serra-Capizzano}, title = {Spectral Analysis and Spectral Symbol for the {2D} curl-curl (Stabilized) Operator with Applications to the Related Iterative Solutions}, institution = it, department = tdb, year = 2017, number = {2017-009}, month = apr, abstract = {In this paper, we study structural and spectral features of linear systems of equations arising from Galerkin approximations of $H(\mathrm{curl})$ elliptic variational problems, based on the Isogeometric Analysis (IgA) approach. Such problems arise in Time Harmonic Maxwell and magnetostatic problems, as well in the preconditioning of MagnetoHydroDynamics equations, and lead to large linear systems, with different and severe sources of ill-conditioning. First, we consider a Compatible B-Splines discretization based on a discrete De Rham sequence and we study the structure of the resulting matrices $\mathcal{A}_{n}$. It turns out that $\mathcal{A}_{n}$ shows a two-by-two pattern and is a principal submatrix of a two-by-two block matrix, where each block is two-level banded, almost Toeplitz, and where the bandwidths grow linearly with the degree of the B-splines. Looking at the coefficients in detail and making use of the theory of the Generalized Locally Toeplitz (GLT) sequences, we identify the symbol of each of these blocks, that is a function describing asymptotically, i.e., for $n$ large enough, the spectrum of each block. From this spectral knowledge and thanks to some new spectral tools we retrieve the symbol of $\{\mathcal{A}_{n}\}_{n}$ which as expected is a two-by-two matrix-valued bivariate trigonometric polynomial. In particular, there is a nice elegant connection with the continuous operator, which has an infinite dimensional kernel, and in fact the symbol is a dyad having one eigenvalue like the one of the IgA Laplacian, and one identically zero eigenvalue: as a consequence, we prove that one half of the spectrum of $\mathcal{A}_{n}$, for $\nn$ large enough, is very close to zero and this represents the discrete counterpart of the infinite dimensional kernel of the continuous operator. From the latter information, we are able to give a detailed spectral analysis of the matrices $\mathcal{A}_{n}$, which is fully confirmed by several numerical evidences. Finally, by taking into consideration the GLT theory and making use of the spectral results, we furnish indications on the convergence features of known iterative solvers and we suggest proper iterative techniques for the numerical solution of the involved linear systems.} } @TechReport{ it:2017-008, author = {Mahdad Davari and Erik Hagersten and Stefanos Kaxiras}, title = {Scope-Aware Classification: Taking the Hierarchical Private/Shared Data Classification to the Next Level}, institution = it, department = docs, year = 2017, number = {2017-008}, month = apr, abstract = {Hierarchical techniques are commonplace in ameliorating the bottlenecks, such as cache coherence, in the design of scalable multi/manycores. Furthermore, there have been proposals to simplify the coherence based on the data-race-free semantics of the software and private/shared data classification, where cores self-invalidate their shared data upon synchronizations. However, naive private/shared data classification in the hierarchies nullifies such optimizations by increasing the amount of data misclassified as shared and therefore being needlessly self-invalidated. We introduce a private/shared data classification approach for hierarchical clusters, where a datum is concurrently classified as private and shared with respect to different classification scopes. Such scope-aware classification eliminates the needless self-invalidation of the valid data at synchronizations, resulting in a coherence scheme that reduces the average network traffic and execution time by 30\% and 5\%, respectively. } } @TechReport{ it:2017-007, author = {Torbj{\"o}rn Wigren}, title = {{MATLAB} Software for Nonlinear and Delayed Recursive Identification - Revision 1}, institution = it, department = tdb, year = 2017, number = {2017-007}, month = apr, note = {The software package can be downloaded from \url{http://www.it.uu.se/research/publications/reports/2017-007/RecursiveNonlinearNetworkedIdentificationSW.zip}.} , abstract = {This report is the user~s manual for a package of MATLAB scripts and functions, developed for recursive prediction error identification of nonlinear state space systems. The identified state space model incorporates delay, which allows a treatment of general nonlinear networked identification, as well as of general nonlinear systems with delay. The core of the package is an implementation of an output error identification algorithm. The algorithm is based on a continuous time, structured black box state space model of a nonlinear system. The software can only be run off-line, i.e. no true real time operation is possible. The algorithms are however implemented so that true on-line operation can be obtained by extraction of the main algorithmic loop. The user must then provide the real time environment. The software package contains scripts and functions that allow the user to either input live measurements or to generate test data by simulation. The scripts and functions for the setup and execution of the identification algorithms are somewhat more general than what is described in the references. The functionality for display of results include scripts for plotting of e.g. data, parameters, prediction errors, eigenvalues and the condition number of the Hessian. The estimated model obtained at the end of a run can be simulated and the model output plotted, alone or together with the data used for identification. Model validation is supported by two methods apart from the display functionality. First, a calculation of the RPEM loss function can be performed, using parameters obtained at the end of an identification run. Secondly, the accuracy as a function of the output signal amplitude can be assessed.} } @TechReport{ it:2017-006, author = {Karl Ljungkvist and Martin Kronbichler}, title = {Multigrid for Matrix-Free Finite Element Computations on Graphics Processors}, institution = it, department = tdb, year = 2017, number = {2017-006}, month = apr, note = {Updated 2017-04-20.}, abstract = {In this paper, we consider matrix-free finite-element techniques for efficient numerical solution of partial differential equations on modern manycore processors such as graphics cards. We present a GPU parallelization of a completely matrix-free geometric multigrid iterative solver, with support for general curved and adaptively refined meshes with hanging nodes. Comparing our implementation running on a Pascal P100 GPU to a highly optimized multi-core implementation running on comparable Broadwell CPUs, we demonstrate speedups of around a factor of 2x across three different Poisson-based applications and a variety of element degrees in 2D and 3D. We also show that atomic intrinsics is consistently the fastest way for shared-memory updates on the GPU, in contrast to previous architectures, and mixed-precision arithmetic can be used successfully, yielding a speedup of up to 83\% over a full double precision approach.} } @TechReport{ it:2017-005, author = {Saleh Rezaeiravesh and Mattias Liefvendahl}, title = {Grid Construction Strategies for Wall-Resolving Large Eddy Simulation and Estimates of the Resulting Number of Grid Points}, institution = it, department = tdb, year = 2017, number = {2017-005}, month = apr, abstract = {Estimates of the total number of grid points required for wall-resolving large eddy simulation (WR-LES) of canonical wall-bounded turbulent flows, corresponding to different grid construction strategies, are derived. The common basis for all strategies is that the first off-wall grid spacing is of the order of the local viscous length scale. First, the estimate of the number of grid points for the block-nested grids, which are widely used in literature to calculate the computational cost of WR-LES, is reviewed in a general setting. Then, different functions, with appropriate controlling parameters, are introduced for distributing the grid points in the wall-normal direction. By using these functions along with assuming grid spacings in the streamwise and spanwise directions to be independent of the wall-normal coordinate, block-structured grids can be constructed, for which analytical expressions are derived to show the dependency of the total number of grid points to the flow Reynolds number. It is shown that under equivalent conditions, this class of grids demands more grid points than the block-nested ones. In particular, for a zero-pressure-gradient turbulent boundary layer at high Reynolds numbers, the increase in the number of grid points can be up to $\mathcal{O}(10^2)$, which relaxes to up to $\mathcal{O}(10)$ for fully-developed turbulent channel flow.} } @TechReport{ it:2017-004, author = {Owe Axelsson and Maya Neytcheva and Anders Str{\"o}m}, title = {An Efficient Preconditioning Method for State Box-Constrained Optimal Control Problems}, institution = it, department = tdb, year = 2017, number = {2017-004}, month = mar, note = {Updated 2017-04-12. A major revision appears in Technical Report 2018-008, see \url{http://www.it.uu.se/research/publications/reports/2018-008}.} , abstract = {An efficient preconditioning technique used earlier for two-by-two block matrix systems with square matrices is shown to be applicable also for a state variable box-constrained optimal control problem. The problem is penalized by a standard regularization term for the control variable and for the box-constraint, using a Moreau-Yosida penalization method. It is shown that there arises very few nonlinear iteration steps and also few iterations to solve the arising linearized equations on the fine mesh. This holds for a wide range of the penalization and discretization parameters. The arising nonlinearity can be handled with a hybrid nonlinear-linear procedure that raises the computational efficiency of the overall solution method.} } @TechReport{ it:2017-003, author = {Pavol Bauer and Stefan Engblom}, title = {The {URDME} manual Version 1.3}, institution = it, department = tdb, year = 2017, number = {2017-003}, month = mar, abstract = {We have developed URDME, a general software for simulation of stochastic reaction-diffusion processes on unstructured meshes. This allows for a more flexible handling of complicated geometries and curved boundaries compared to simulations on structured, cartesian meshes. The underlying algorithm is the next subvolume method, extended to unstructured meshes by obtaining jump coefficients from a finite element formulation of the corresponding macroscopic equation. This manual describes version 1.3 of the software. URDME 1.3 includes support for Comsol Multiphysics 5.x and PDE Toolbox version 1.5 and above.} } @TechReport{ it:2017-002, author = {C. Garoni and S. Serra-Capizzano}, title = {{G}eneralized {L}ocally {T}oeplitz Sequences: Theory and Applications}, institution = it, department = tdb, year = 2017, number = {2017-002}, month = feb, abstract = {We present an extensive review on the theory of Generalized Locally Toeplitz (GLT) matrix-sequences, as an extension of the theory of Toeplitz operators and of the Fourier Analysis for variable-coefficients integrodifferential operators.} } @TechReport{ it:2017-001, author = {Mattias Liefvendahl and Timofey Mukha and Saleh Rezaeiravesh}, title = {Formulation of a Wall Model for {LES} in a Collocated Finite-Volume Framework}, institution = it, department = tdb, year = 2017, number = {2017-001}, month = jan, abstract = {Wall-modelled Large-Eddy Simulation (WMLES) is a turbulence modelling approach that aims to reduce the computational costs of traditional (wall-resolved) LES by introducing special treatment of the near-wall region. In this report, a detailed derivation is given of a widely used wall model which is based on implicitly enforcing the correct local wall shear stress. The model is developed in the framework of a collocated finite volume method, with unstructured meshes and arbitrary polyhedral grid cells. The expression for the law of the wall proposed by Spalding is used to relate the finite volume cell flow velocity to the wall-shear stress. The computational cost and predictive accuracy of the overall method is evaluated by a series of turbulent channel flow simulations covering a range of Re-numbers, from $10\,000$ to $125\,000$, based on the bulk velocity and channel half-height. For validation, the WMLES-results are compared to publicly available results from direct numerical simulations. It is demonstrated that the WMLES approach significantly reduces the computational cost, as compared to wall-resolved LES, while maintaining an acceptable accuracy of, at least, first-order statistical moments of the unknowns.} } @TechReport{ it:2016-017, author = {S.-E. Ekstr{\"o}m and S. Serra-Capizzano}, title = {Eigenvalues of Banded Symmetric {T}oeplitz Matrices are Known Almost in Close Form?}, institution = it, department = tdb, year = 2016, number = {2016-017}, month = sep, abstract = {It is well-known that the eigenvalues of (real) symmetric banded Toeplitz matrices of size $n$ are approximately given by an equispaced sampling of the symbol $f(\theta)$, up to an error which grows at most as $h=(n+1)^{-1}$, where the symbol is a real-valued cosine polynomial. Under the condition that $f$ is monotone, we show that there is hierarchy of symbols so that \[ \lambda_{j}^{(h)}-f\left(\theta_{j}^{(h)}\right)=\sum_k c_k\left(\theta_{j}^{(h)}\right)\, h^k,\quad \quad \theta_j^{(h)}=j\pi h, j=1,\ldots,n, \] with $c_k(\theta)$ higher order symbols. In the general case, a more complicate expression holds but still we find a structural hierarchy of symbols. The latter asymptotic expansions constitute a starting point for computing the eigenvalues of large symmetric banded Toeplitz matrices by using classical extrapolation methods. Selected numerics are shown in 1D and a similar study is briefly discussed in the multilevel setting ($d$D, $d\ge 2$) with blocks included, so opening the door to a fast computation of the spectrum of matrices approximating partial differential operators.} } @TechReport{ it:2016-016, author = {Torsten S{\"o}derstr{\"o}m and Umberto Soverini}, title = {Errors-in-Variables Identification using Maximum Likelihood Estimation in the Frequency Domain}, institution = it, department = syscon, year = 2016, number = {2016-016}, month = sep, abstract = {This report deals with the identification of errors-in-variables (EIV) models corrupted by additive and uncorrelated white Gaussian noises when the noise--free input is an arbitrary signal, not required to be periodic. In particular, a frequency domain maximum likelihood (ML) estimator is proposed and analyzed in some detail. As some other EIV estimators, this method assumes that the ratio of the noise variances is known. The estimation problem is formulated in the frequency domain. It is shown that the parameter estimates are consistent. An explicit algorithm for computing the asymptotic covariance matrix of the parameter estimates is derived. The possibility to effectively use lowpass filtered data by using only part of the frequency domain is discussed, analyzed and illustrated. } } @TechReport{ it:2016-015, author = {Afshin Zafari and Elisabeth Larsson and Marco Righero and M. Alessandro Francavilla and Giorgio Giordanengo and Francesca Vipiana and Giuseppe Vecchi}, title = {Task Parallel Implementation of a Solver for Electromagnetic Scattering Problems}, institution = it, department = tdb, year = 2016, number = {2016-015}, month = aug, abstract = {Electromagnetic computations, where the wavelength is small in relation to the geometry of interest, become computationally demanding. In order to manage computations for realistic problems like electromagnetic scattering from aircraft, the use of parallel computing is essential. In this paper, we describe how a solver based on a hierarchical nested equivalent source approximation can be implemented in parallel using a task based programming model. We show that the effort for moving from the serial implementation to a parallel implementation is modest due to the task based programming paradigm, and that the performance achieved on a multicore system is excellent provided that the task size, depending on the method parameters, is large enough.} } @TechReport{ it:2016-014, author = {Vasilis Spiliopoulos and Andreas Sembrant and Georgios Keramidas and Erik Hagersten and Stefanos Kaxiras}, title = {A Unified {DVFS}-Cache Resizing Framework}, institution = it, department = docs, year = 2016, number = {2016-014}, month = aug, abstract = {Cache resizing and DVFS are two well-known techniques, employed to reduce leakage and dynamic power consumption respectively. Although extensively studied, these techniques have not been explored in combination. In this work we argue that optimal frequency and cache size are highly affected by each other, therefore should be studied together. We present a framework that drives DVFS and Cache Resizing decisions in a unified, co-ordinated way. We show that MLP is the key to understand how performance is affected by both techniques and we develop an analytical model to quantify performance variation under different cache sizes and core frequencies. Finally, we expose this information to the OS and/or the application, which are responsible for setting core frequency and cache size based on energy-efficiency policies defined by the user. Our experimental results show that our model can drive DVFS and Cache Resizing decisions to reduce dynamic and static energy consumption and improve EDP by 18\% on average for SPEC2006. We evaluate different policies and showcase that with our model, it is trivial to build any policy involving energy-performance requirements.} } @TechReport{ it:2016-013, author = {Elias Castegren and Tobias Wrigstad}, title = {{LOLCAT}: Relaxed Linear References for Lock-free Programming}, institution = it, department = csd, year = 2016, number = {2016-013}, month = jul, abstract = {A linear reference is a reference guaranteed to be unaliased. This is a powerful property that simplifies reasoning about programs, but is also a property that is too strong for certain applications. For example, lock-free algorithms, which implement protocols to ensure safe concurrent access to data structures, are generally not typable with linear references as they involve sharing of mutable state. This paper presents a type system with a relaxed notion of linearity that allows an unbounded number of aliases to an object as long as at most one alias at a time owns the right to access the contents of the object. This ownership can be transferred between aliases, but can never be duplicated. The resulting language is flexible enough to express several lock-free algorithms and at the same time powerful enough to guarantee the absence of data-races when accessing owned data. The language is formalised and proven sound, and is also available as a prototype implementation.} } @TechReport{ it:2016-012, author = {Marco Donatelli and Paola Novara and Lucia Romani and Stefano Serra-Capizzano and Debora Sesana}, title = {Surface Subdivision Algorithms and Structured Linear Algebra: a Computational Approach to Determine Bounds of Extraordinary Rule Weights}, institution = it, department = tdb, year = 2016, number = {2016-012}, month = jul, abstract = {In the vicinity of extraordinary vertices, the action of a primal subdivision scheme for the construction of arbitrary topology surfaces can be represented by structured matrices that form a hybrid matrix algebra related to the block-circulant algebra. Exploiting the block diagonalization of such matrices, we can easily take into consideration the constraints to be satisfied by their eigenvalues and provide an efficient computational approach for determining the ranges of variability of the weights defining the extraordinary rules. Application examples of this computational strategy are shown to find the bounds of extraordinary rule weights for improved variants of two existing subdivision schemes.} } @TechReport{ it:2016-011, author = {Ken Mattsson}, title = {Diagonal-Norm Upwind {SBP} Operators}, institution = it, department = tdb, year = 2016, number = {2016-011}, month = jun, abstract = { High-order accurate first derivative finite difference operators are derived that naturally introduce artificial dissipation. The boundary closures are based on the diagonal-norm summation-by-parts (SBP) framework and the boundary conditions are imposed using a penalty (SAT) technique, to guarantee linear stability for a large class of initial boundary value problems. These novel first derivative SBP operators have a non-central difference stencil in the interior, and come in pairs (for each order of accuracy). The resulting SBP-SAT approximations lead to fully explicit ODE systems. The accuracy and stability properties are demonstrated for linear first- and second-order hyperbolic problems in 1D, and for the compressible Euler equations in 2D. The newly derived first derivative SBP operators lead to significantly more robust and accurate numerical approximations, compared with the usage of (previously derived) central difference first derivative SBP operators.} } @TechReport{ it:2016-010, author = {Afshin Zafari and Elisabeth Larsson and Martin Tillenius}, title = {Duct{T}eip: A Task-Based Parallel Programming Framework for Distributed Memory Architectures}, institution = it, department = tdb, year = 2016, number = {2016-010}, month = jun, abstract = {Current high-performance computer systems used for scientific computing typically combine shared memory compute nodes in a distributed memory environment. Extracting high performance from these complex systems requires tailored approaches. Task based parallel programming has been successful both in simplifying the programming and in exploiting the available hardware parallelism. We have previously developed a task library for shared memory systems which performs well compared with other libraries. Here we extend this to distributed memory architectures. We use a hierarchical decomposition of tasks and data in order to accommodate the different levels of hardware. Our experiments on implementing distributed Cholesky factorization show that our framework has low overhead and scales to at least 800 cores. We perform a comparison with related frameworks and show that DuctTeip is highly competitive in its class of frameworks. } } @TechReport{ it:2016-009, author = {Andreas Svensson}, title = {On the Role of {M}onte {C}arlo Methods in {S}wedish {M. Sc.} Engineering Education}, institution = it, department = syscon, year = 2016, number = {2016-009}, month = may, abstract = {Monte Carlo methods have quite recently become well-established tools for scientific computations in many different fields. We raise the question whether the education has followed this development, and investigate to what extent Monte Carlo methods are present in Swedish M. Sc. engineering education today. By studying the course syllabuses, we conclude that 11\% of the Swedish M. Sc. engineering students are admitted to a program with a mandatory course containing Monte Carlo methods. Further, 60\% of all students have the option to actively choose a course which at least mentions Monte Carlo methods. Courses containing Monte Carlo methods are mostly on advanced level with a clear application focus, and tend to be given at the bigger universities. We thus conclude that Monte Carlo methods have found their way into the M. Sc. Engineering education, but the question about level and to what extent has perhaps not yet reached maturity among the Swedish universities.} } @TechReport{ it:2016-008, author = {Andreas Svensson and Thomas B. Sch{\"o}n}, title = {Comparing Two Recent Particle Filter Implementations of {B}ayesian System Identification}, institution = it, department = syscon, year = 2016, number = {2016-008}, month = may, abstract = {Bayesian system identification is a theoretically well-founded and currently emerging area. We describe and evaluate two recent state-of-the-art sample-based methods for Bayesian parameter inference from the statistics literature, particle Metropolis-Hastings (PMH) and SMC$^2$, and apply them to a non-trivial real world system identification problem with large uncertainty present. We discuss their different properties from a user perspective, and conclude that they show similar performance in practice, while PMH is significantly easier to implement than SMC$^2$.} } @TechReport{ it:2016-007, author = {Elias Castegren and Tobias Wrigstad}, title = {Reference Capabilities for Trait Based Reuse and Concurrency Control}, institution = it, department = csd, year = 2016, number = {2016-007}, month = may, note = {Extended version of \emph{Reference Capabilities for Concurrency Control}, ECOOP 2016..}, abstract = {The proliferation of shared mutable state in object-oriented programming complicates software development as two seemingly unrelated operations may interact via an alias and produce unexpected results. In concurrent programming this manifests itself as data-races. Concurrent object-oriented programming further suffers from the fact that code that warrants synchronisation cannot easily be distinguished from code that does not. The burden is placed solely on the programmer to reason about alias freedom, sharing across threads and side-effects to deduce where and when to apply concurrency control, without inadvertently blocking parallelism. This paper presents a reference capability approach to concurrent and parallel object-oriented programming where all uses of aliases are guaranteed to be data-race free. The static type of an alias describes its possible sharing without using explicit ownership or effect annotations. Type information can express non-interfering deterministic parallelism without dynamic concurrency control, thread-locality, lock-based schemes, and guarded-by relations giving multi-object atomicity to nested data structures. Unification of capabilities and traits allows trait-based reuse across multiple concurrency scenarios with minimal code duplication. The resulting system brings together features from a wide range of prior work in a unified way.} } @TechReport{ it:2016-006, author = {Josefin Ahlkrona}, title = {The {ISCAL} method and the Grounding Line - Combining the {S}tokes equations with the Shallow Ice Approximation and Shelfy Stream Approximation}, institution = it, department = tdb, year = 2016, number = {2016-006}, month = apr, abstract = {The Ice Sheet Coupled Approximation Level (ISCAL) method is extended so that the non-linear Stokes equations governing ice sheet flow are coupled with a hybrid model. The hybrid model is a combination of the Shallow Ice Approximation (SIA) and the Shelfy Stream Approximation (SSA). The Stokes equations are only solved where the hybrid model is inaccurate, which is determined by an automatic error estimation. The method is tested on short simulations of coupled ice sheet/ice shelf flow. The ISCAL method applies the SIA in the high friction inland areas, the SSA at the margins and the shelf, and the Stokes equation at the grounding line, and occasionally at the dome. A method for static load balancing of the ISCAL method is proposed. } } @TechReport{ it:2016-005, author = {Josefin Ahlkrona and Victor Shcherbakov}, title = {A Meshfree Approach to Non-{N}ewtonian Free Surface Ice Flow: Application to the {H}aut {G}lacier d~{A}rolla}, institution = it, department = tdb, year = 2016, number = {2016-005}, month = apr, abstract = {Numerical models of glacier and ice sheet dynamics traditionally employ finite difference or finite element methods. Although these are highly developed and mature methods, they suffer from some drawbacks, such as inability to handle complex geometries (finite differences) or a costly assembly procedure for nonlinear problems (finite elements). Additionally, they are mesh-based, and therefore moving domains become a challenge. In this paper, we introduce a novel meshfree approach based on a radial basis function (RBF) method. The meshfree nature of RBF methods enables efficient handling of moving margins and free ice surface. RBF methods are also highly accurate, easy to implement, and allow for reduction the computational cost associated with the linear system assembly, since stated in strong form. To demonstrate the global RBF method we model the velocity field of ice flow in the Haut Glacier d'Arolla, which is governed by the nonlinear Stokes equations. We test the method for different basal conditions and for a free moving surface. We also compare the global RBF method with its localized counterpart---the RBF partition of unity method (RBF--PUM)---that allows for a significant gain in the computational efficiency. We find that the RBF methods are well suited for ice dynamics modelling, especially the partition of unity approach.} } @TechReport{ it:2016-004, author = {Nikos Nikoleris and Andreas Sandberg and Erik Hagersten and Trevor E. Carlson}, title = {Implementing Fast, Virtualized Profiling to Eliminate Cache Warming}, institution = it, department = docs, year = 2016, number = {2016-004}, month = mar, abstract = {Simulation is an important part of the evaluation of next-generation computing systems. Detailed, cycle-level simulation, however, can be very slow when evaluating realistic workloads on modern microarchitectures. Sampled simulation (e.g., SMARTS and SimPoint) improves simulation performance by an order of magnitude or more through the reduction of large workloads into a small but representative sample. Additionally, the execution state just prior to a simulation sample can be stored into checkpoints, allowing for fast restoration and evaluation. Unfortunately, changes in software, architecture or fundamental pieces of the microarchitecture (e.g., hardware-software co-design) require checkpoint regeneration. The end result for co-design degenerates to creating checkpoints for each modification, a task checkpointing was designed to eliminate. Therefore, a solution is needed that allows for fast and accurate simulation, without the need for checkpoints. Virtualized fast-forwarding proposals, like FSA, are an alternative to checkpoints that speed up sampled simulation by advancing the execution at near-native speed between simulation points. They rely, however, on functional simulation to warm the architectural state prior to each simulation point, a costly operation for moderately-sized last-level caches (e.g., above $8$MB). Simulating future systems with DRAM caches of many GBs can require warming of billions of instructions, dominating the time for simulation and negating the benefit of virtualized fast-forwarding. This paper proposes CoolSim, an efficient simulation framework that eliminates cache warming. CoolSim advances between simulation points using virtualized fast-forwarding, while collecting sparse memory reuse information (MRI). The MRI is collected more than an order of magnitude faster than functional warming. At the simulation point, detailed simulation is used to evaluate the design while a statistical cache model uses the previously acquired MRI to estimate whether each memory request hits in the cache. The MRI is an architecturally independent metric and therefore a single profile can be used in simulations of any size cache. We describe a prototype implementation of CoolSim based on KVM and gem5 running $19$x faster than the state-of-the-art sampled simulation, while it estimates the CPI of the SPEC CPU2006 benchmarks with $3.62\%$ error on average, across a wide range of cache sizes.} } @TechReport{ it:2016-003, author = {Torbj{\"o}rn Wigren}, title = {A Tradeoff Between Data Rate and Regulation Performance in Networked Data Flow Control}, institution = it, department = syscon, year = 2016, number = {2016-003}, month = jan, abstract = {The report investigates fundamental trade-offs related to the static regulation performance in networked control systems with delay and saturation in the feedback loop. The trade-offs are a consequence of required $\mathcal{L}_2$ stability of the networked system, in the limit where the delay tends to infinity. First a relation between the relative static regulation accuracy and the static gain of the part of the plant that affects the disturbance is derived. Based on this, the special case with one directional networked flow of an arbitrary medium is considered, where the saturation is a consequence of assumed one-directional flow. For this case it is shown that the trade-off becomes one between relative static regulation accuracy and flow capacity. In the wireless case this implies that there is a tradeoff between the relative static regulation performance and the achievable end to end Shannon data rate. A numerical study of a wireless transmit queue data flow controller illustrates and validates the discovered fundamental trade-offs and limitations.} } @TechReport{ it:2016-002, author = {Stefan P{\aa}lsson and Bj{\"o}rn Victor}, title = {Proceedings fr{\aa}n 5:e Utvecklingskonferensen f{\"o}r Sveriges ingenj{\"o}rsutbildningar}, institution = it, year = 2016, number = {2016-002}, month = feb, note = {In Swedish.}, abstract = {Den 5:e utvecklingskonferensen f{\"o}r Sveriges ingenj{\"o}rsutbildningar genomf{\"o}rdes den 18-19 november 2015 vid Uppsala universitet, Uppsala. Arrang{\"o}r f{\"o}r konferensen var den Teknisk-naturvetenskapliga fakulteten vid Uppsala universitet. Konferensen har sedan 2008 arrangerats vartannat {\aa}r, och {\"a}r en nationell konferens specifikt f{\"o}r Sveriges ingenj{\"o}rsutbildningar. Konferensens syfte {\"a}r att utveckla ingenj{\"o}rsutbildningarnas kvalitet genom att underl{\"a}tta kommunikation och fr{\"a}mja samarbete. Konferensen ska i f{\"o}rsta hand identifiera och diskutera aktuella och gemensamma fr{\aa}gor, det vill s{\"a}ga ge m{\"o}jlighet till erfarenhetsutbyte kring utveckling av ingenj{\"o}rsutbildning snarare {\"a}n att vara en traditionellt vetenskaplig konferens f{\"o}r rapportering av forskning om ingenj{\"o}rsutbildning. Huvudansvaret f{\"o}r Utvecklingskonferensen ligger p{\aa} de nationella organen RET/TUF-gruppen f{\"o}r civilingenj{\"o}rsutbildning tillsammans med Samverkansgruppen f{\"o}r h{\"o}gskoleingenj{\"o}rsutbildningar. De tidigare konferenserna har h{\aa}llits vid KTH 2008, Lunds Tekniska h{\"o}gskola 2009, Link{\"o}pings universitet 2011 och Ume{\aa} universitet 2013. N{\"a}sta utvecklingskonferens {\"a}r planerad till Chalmers tekniska h{\"o}gskola 2017. M{\aa}lgruppen f{\"o}r konferensen {\"a}r l{\"a}rare inom ingenj{\"o}rsutbildningar, personer aktiva inom programledning f{\"o}r ingenj{\"o}rsutbildning (exempelvis programansvariga, administrat{\"o}rer, studiev{\"a}gledare, studentrepresentanter), och pedagogiska ledare och utvecklare inom ingenj{\"o}rsutbildning. Konferensens teman inkluderade bl.a. aktivt l{\"a}rande, ingenj{\"o}rsrollen och programutveckling. } } @TechReport{ it:2016-001, author = {Marco Donatelli and Ali Dorostkar and Mariarosa Mazza and Maya Neytcheva and Stefano Serra-Capizzano}, title = {A Block Multigrid Strategy for Two-Dimensional Coupled {PDE}s}, institution = it, department = tdb, year = 2016, number = {2016-001}, month = jan, abstract = {We consider the solution of linear systems of equations, arising from the finite element approximation of coupled differential boundary value problems. Letting the fineness parameter tend to zero gives rise to a sequence of large scale structured two-by-two block matrices. We are interested in the efficient iterative solution of the so arising linear systems, aiming at constructing optimal preconditioning methods that are robust with respect to the relevant parameters of the problem. We consider the case when the originating systems are solved by a preconditioned Krylov method, as inner solver, and propose an efficient preconditioner for that, based on the Generalized Locally Toeplitz framework. In this paper, we exploit the almost two-level block Toeplitz structure of the arising block matrix. We provide a spectral analysis of the underlying matrices and then, by exploiting the spectral information, we design a multigrid method with an ad hoc grid transfer operator. As shown in the included examples, choosing the damped Jacobi or Gauss-Seidel methods as smoothers and using the resulting solver as a preconditioner leads to a competitive strategy that outperforms some aggregation-based algebraic multigrid methodss, widely employed in the relevant literature.} } @TechReport{ it:2015-038, author = {Sven-Olof Nystr{\"o}m}, title = {Subtyping and Algebraic Data Types}, institution = it, department = csd, year = 2015, number = {2015-038}, month = dec, abstract = {We present a new method for type inference. Traditional approaches rely on an inductively defined domain of types. Instead, we specify the properties of the type system as a set of axioms, and give a polynomial-time algorithm for checking whether there is any domain of types for which the program types. We show the correctness of the algorithm and also prove that safety properties are satisfied; any program accepted by the type system will not cause type errors at run-time. The advantages of our approach is that is simpler and more general. The algorithm for checking that a program types is a simple mechanism for propagating type information which should be easy to extend to support other programming language features. The type checking algorithm is more general than other approaches as the algorithm will accept any program that types under any type system that satisfies the axioms. We also show how to make type information available to compilers and other development tools through an algorithm to determine entailment. The language we consider is lambda calculus extended with a constructors and a form of type construct we call \emph{open case expressions}. Open case expressions allow a program to manipulate abstract data types where the sets of constructors overlap.} } @TechReport{ it:2015-037, author = {Muneeb Khan and David Black-Schaffer and Erik Hagersten}, title = {{P}erf-{I}nsight: A Simple, Scalable Approach to Optimal Data Prefetching in Multicores}, institution = it, department = docs, year = 2015, number = {2015-037}, month = dec, abstract = {Aggressive hardware prefetching is extremely beneficial for single-threaded performance but can lead to significant slowdowns in multicore processors due to oversubscription of off-chip bandwidth and shared cache capacity. This work addresses this problem by adjusting prefetching on a per-application basis to improve overall system performance. Unfortunately, an exhaustive search of all possible per-application prefetching combinations for a multicore workload is prohibitively expensive, even for small processors with only four cores. In this work we develop Perf-Insight, a simple, scalable mechanism for understanding and predicting the impact of any available hardware/software prefetching choices on applications' bandwidth consumption and performance. Our model considers the overall system bandwidth, the bandwidth sensitivity of each co-running application, and how applications' bandwidth usage and performance vary with prefetching choices. This allows us to profile individual applications and efficiently predict total system bandwidth and throughput. To make this practical, we develop a low-overhead profiling approach that scales linearly, rather than exponentially, with the number of cores, and allows us to profile applications while running in the mix. With Perf-Insight we are able to achieve an average weighted speedup of 21\% for 14 mixes of 4 applications on commodity hardware, with no mix experiencing a slowdown. This is significantly better than hardware prefetching, which only achieves an average speedup of 9\%, with three mixes experiencing slowdowns. Perf-Insight delivers performance very close to the best possible prefetch settings (22\%). Our approach is simple, low-overhead, applicable to any collection of prefetching options and performance metric, and suitable for dynamic runtime use on commodity multicore systems.} } @TechReport{ it:2015-036, author = {Owe Axelsson and Shiraz Farouq and Maya Neytcheva}, title = {A Preconditioner for Optimal Control Problems, Constrained by {S}tokes Equation with a Time-Harmonic Control}, institution = it, department = tdb, year = 2015, number = {2015-036}, month = dec, abstract = {In an earlier paper preconditioning of stationary Stokes-constrained optimal control problems has been considered. In this article we construct an efficient preconditioner for solving the algebraic systems arising from discretized optimal control problems with time-periodic Stokes equations. A simplified analysis of the derivation of the preconditioner and its properties is presented. The reconditioner is fully parameter independent and the condition number of the corresponding preconditioned matrix is bounded by $2$. The o-constructed preconditioner is favourably compared with another preconditioner for the same problem.} } @TechReport{ it:2015-035, author = {Johan {\"O}fverstedt and Per Normann}, title = {Deterministic Parallel Graph Coloring with Repartitioning by Auxiliary Graph Coloring}, institution = it, department = tdb, year = 2015, number = {2015-035}, month = dec, abstract = {In this paper we propose a deterministic parallel graph coloring algorithm that enables Multi-Coloring in parallel for sparse undirected graphs by coarse-grained segmentation and auxiliary graph coloring. The proposed algorithm is implemented and tested on standard problem instances from engineering applications and benchmarked against various relevant deterministic graph coloring algorithms. Quantified results show that the proposed algorithm is competitive or better than the sequential Multi-Coloring algorithm with respect to execution time on multi-core architectures. The upper bound on the number of colors is guaranteed to be the same as for the Multi-Coloring algorithm.} } @TechReport{ it:2015-034, author = {Adriaan Larmuseau and Marco Patrignani and Dave Clarke}, title = {Implementing a Secure Abstract Machine -- Extended Version}, institution = it, department = csd, year = 2015, number = {2015-034}, month = nov, note = {This technical report serves as the companion report to a SEC@SAC 2016 paper of the same name.}, abstract = {Abstract machines are both theoretical models used to study language properties and practical models of language implementations. As with all language implementations, abstract machines are subject to security violations by the context in which they reside. This paper presents the implementation of an abstract machine for ML that preserves the abstractions of the language it implements, in possibly malicious, low-level contexts. To guarantee this security result, we make use of a low-level memory isolation mechanism and derive the formalisation of the machine through a methodology, whose every step is accompanied by formal properties that ensure that the step has been carried out properly. We provide an implementation of the abstract machine and analyse its performance for relevant scenarios.} } @TechReport{ it:2015-033, author = {Volkan Cambazoglu and Ram{\=u}nas Gutkovas and Johannes {\AA}man Pohjola and Bj{\"o}rn Victor}, title = {Modelling and Analysing a WSN Secure Aggregation Protocol: A Comparison of Languages and Tool Support}, institution = it, year = 2015, number = {2015-033}, month = nov, note = {Updated 2015-12-02: The results in subsection 4.1.3 are updated because we realised that Pwb can evaluate the SHIA model faster for network sizes of 2 and 4, and also can handle network size of 8.}, abstract = {A security protocol promises protection of a significant piece of information while using it for a specific purpose. Here, the protection of the information is vital and a formal verification of the protocol is an essential step towards guaranteeing this protection. In this work, we study a secure aggregation protocol (SHIA) for Wireless Sensor Networks and verify the protocol in three formal modelling tools (Pwb, mCRL2 and ProVerif). The results of formal verification heavily depend on the model specification and the ability of the tools to deal with the model. Among the three tools, there is difference in data representation, communication types and the level of abstraction in order to represent SHIA. ProVerif and mCRL2 are mature and well-established tools, geared respectively towards security and distributed systems; however, their expressiveness constrains modelling SHIA and its security properties. Pwb is an experimental tool developed by the authors; its relative immaturity is offset by its increased expressive power and customisability. This leads to different models of the same protocol, each contributing in different ways to our understanding of SHIA's security properties.} } @TechReport{ it:2015-032, author = {Sofia Cassel and Falk Howar and Bengt Jonsson and Bernhard Steffen}, title = {Active Learning for Extended Finite State Machines}, institution = it, department = docs, year = 2015, number = {2015-032}, month = oct, note = {This report updates and supercedes Technical Report 2015-004.}, abstract = {We present a black-box active learning algorithm for inferring extended finite state machines (EFSM)s, combining data flow and control behavior. Different dialects of EFSMs are widely used in tools for model-based software development, verification, and testing. Our algorithm infers a class of EFSMs called \emph{register automata}. Register automata have a finite control structure, extended with variables (registers), assignments, and guards. Our algorithm is parameterized on a particular theory, i.e., a set of operations and tests on the data domain that can be used in guards. Key to our learning technique is a novel learning model based on so-called \emph{tree queries}. The learning algorithm uses the tree queries to infer symbolic data constraints on parameters, e.g., sequence numbers, time stamps, identifiers, or even simple arithmetic. We describe sufficient conditions for the properties that the symbolic constraints provided by a tree query in general must have to be usable in our learning model. We also show that, under these conditions, our framework induces a generalization of the classical Nerode equivalence and canonical automata construction to the symbolic setting. We have evaluated our algorithm in a black-box scenario, where tree queries are realized through (black-box) testing. Our case studies include connection establishment in TCP and a priority queue from the Java Class Library.} } @TechReport{ it:2015-031, author = {Per Normann and Johan {\"O}fverstedt}, title = {Deterministic Parallel Graph Coloring with Symmetry Breaking}, institution = it, department = tdb, year = 2015, number = {2015-031}, month = oct, abstract = {In this paper we propose a deterministic parallel graph coloring algorithm that enables Multi-Coloring in parallel for sparse undirected graphs by coarse-grained segmentation and symmetry breaking. The proposed algorithm is implemented and tested on standard problem instances from engineering applications and benchmarked against various deterministic graph coloring algorithms. Quantified results show that the proposed algorithm is competitive or better than the sequential Multi-Coloring algorithm with respect to execution time on multi-core architectures. The upper bound on the number of colors is guaranteed to be the same as for the Multi-Coloring algorithm.} } @TechReport{ it:2015-030, author = {Owe Axelsson and Shiraz Farouq and Maya Neytcheva}, title = {Comparison of preconditioned {K}rylov subspace iteration methods for {PDE}-constrained optimization problems. {S}tokes control}, institution = it, department = tdb, year = 2015, number = {2015-030}, month = sep, abstract = {The governing dynamics of fluid flow is stated as a system of partial differential equations referred to as the Navier-Stokes system. In industrial and scientific applications, fluid flow control becomes an optimization problem where the governing partial differential equations of the fluid flow are stated as constraints. When discretized, the optimal control of the Navier-Stokes equations leads to large sparse saddle point systems in two levels. In this paper we consider distributed optimal control for the Stokes system and test the particular case when the arising linear system can be compressed after eliminating the control function. In that case, a system arises in a form which enables the application of an efficient block matrix preconditioner that previously has been applied to solve complex-valued systems in real arithmetic. Under certain conditions the condition number of the so preconditioned matrix is bounded by 2. The numerical and computational efficiency of the method in terms of number of iterations and execution time is favorably compared with other published methods.} } @TechReport{ it:2015-029, author = {Kjellin Lagerqvist, Nanna}, title = {Resultat och reflektioner kring mailkategorisering av anv{\"a}ndares mail till Uppsala l{\"a}ns landsting kring {\aa}tkomst av journaler via n{\"a}tet}, institution = it, department = docs, year = 2015, number = {2015-029}, month = sep, note = {In Swedish. Updates and supercedes Technical Report nr 2015-010.}, abstract = {P{\aa} uppdrag av Benny Eklund, Uppsala l{\"a}ns landsting, har mail inkomna till mailboxen med adress sustains@lul.se under fj{\"a}rde kvartalet 2012 till och med f{\"o}rsta kvartalet 2014 l{\"a}sts och kategoriserats. Mailboxens syfte har varit att motta feedback fr{\aa}n anv{\"a}ndare av Sustains, senare Journalen. Nedan ben{\"a}mns b{\aa}da versionerna av systemet vid samlingsnamnet Journal via n{\"a}tet. Feedbacken var av typen fritt formulerade {\aa}sikter och erfarenheter. Anv{\"a}ndarna har inte anv{\"a}nt n{\aa}gon mall eller svarat p{\aa} n{\aa}gra specifika fr{\aa}gor utan har ombetts ber{\"a}tta fritt om sina upplevelser. Under den iterativa genomg{\aa}ngen av mailen utformades fjorton kategorier utifr{\aa}n vilka anv{\"a}ndarnas feedback sorterades. Resultatet av detta visade att de flesta anv{\"a}ndare var positiva till Journal via n{\"a}tet och det fr{\"a}msta anv{\"a}ndningsomr{\aa}det var som minnesanteckning och komplement till ett tidigare v{\aa}rdbes{\"o}k. {\"A}ven anv{\"a}ndare som inte hade n{\aa}gon v{\aa}rdkontakt i Uppsala l{\"a}n, och d{\"a}rf{\"o}r inte s{\aa}g n{\aa}got inneh{\aa}ll i journalen, var positiva till tj{\"a}nsten och efterfr{\aa}gade den till sina landsting.} } @TechReport{ it:2015-028, author = {Adriaan Larmuseau and Marco Patrignani and Dave Clarke}, title = {A Secure Compiler for {ML} Modules - Extended Version}, institution = it, department = csd, year = 2015, number = {2015-028}, month = sep, note = {This technical report is an updated version of TR 2015-017 that serves as the companion report to an APLAS 2015 paper of the same title.}, abstract = {Many functional programming languages compile to low-level languages such as C or assembly. Most security properties of those compilers, however, apply only when the compiler compiles whole programs. This paper presents a compilation scheme that securely compiles a standalone module of ModuleML, a light-weight version of an ML with modules, into untyped assembly. The compilation scheme is secure in that it reflects the abstractions of a ModuleML module, for every possible piece of assembly code that it interacts with. This is achieved by isolating the compiled module through a low-level memory isolation mechanism and by dynamically type checking the low-level interactions. We evaluate an implementation of the compiler on relevant test scenarios.} } @TechReport{ it:2015-027, author = {Umberto Soverini and Torsten S{\"o}derstr{\"o}m}, title = {Frequency Domain Identification of Autoregressive Models in the Presence of Additive Noise}, institution = it, department = syscon, year = 2015, number = {2015-027}, month = sep, abstract = {This paper describes a new approach for identifying autoregressive models from a finite number of measurements, in presence of additive and uncorrelated white noise. As a major novelty, the proposed approach deals with frequency domain data. In particular, two different frequency domain algorithms are proposed. The first algorithm is based on some theoretical results concerning the so--called dynamic Frisch Scheme. The second algorithm maps the AR identification problem into a quadratic eigenvalue problem. Both methods resemble in many aspects some other identification algorithms, originally developed in the time domain. The features of the proposed methods are compared to each other and with those of other time domain algorithms by means of Monte Carlo simulations.} } @TechReport{ it:2015-026, author = {Adriaan Larmuseau and Dave Clarke}, title = {Modelling an Assembly Attacker by Reflection}, institution = it, department = csd, year = 2015, number = {2015-026}, month = aug, note = {This technical report is an extended version of a paper titled: \emph{A High-Level Model for an Assembly Language Attacker by means of Reflection} that is to appear at SETTA 2015.}, abstract = {Many high-level functional programming languages are compiled to or interoperate with, low-level languages such as C and assembly. Research into the security of these compilation and interoperation mechanisms often makes use of high-level attacker models to simplify formalisations. In practice, however, the validity of such high-level attacker models is frequently called into question. In this technical report we formally prove that a light-weight ML-like including references, equipped with a reflection operator can serve as an accurate model for malicious assembly language programs, when reasoning about the security threats posed to the abstractions of high-level functional programs that reside within a protected memory space. The proof proceeds by relating a bisimulation over the inputs and observations of the assembly language attacker to a bisimulation over the inputs and observations of the high-level attacker.} } @TechReport{ it:2015-025, author = {Fritjof Fagerlund and Fredrik Hellman and Axel M{\aa}lqvist and Auli Niemi}, title = {Improved {M}onte {C}arlo Methods for Computing Failure Probabilities of Porous Media Flow Systems}, institution = it, department = tdb, year = 2015, number = {2015-025}, month = aug, abstract = {We study improvements of standard and multilevel Monte Carlo methods for point evaluation of the cumulative distribution function (failure probability) applied to porous media two-phase flow simulations with uncertain permeability. In an injection scenario with sweep efficiency of the injected phase as quantity of interest, we seek the probability that this quantity of interest is smaller than a critical value. In the sampling procedure, we use computable error bounds on the sweep efficiency functional to solve only a subset of all realizations to highest accuracy by means of what we call selective refinement. We quantify the performance gains possible by using selective refinement in combination with both the standard and multilevel Monte Carlo method. We also identify issues in the process of practical implementation of the methods. We conclude that significant savings (one order of magnitude) in computational cost are possible for failure probability estimation in a realistic setting using the selective refinement technique, both in combination with standard and multilevel Monte Carlo.} } @TechReport{ it:2015-024, author = {Owe Axelsson and Shiraz Farouq and Maya Neytcheva}, title = {Comparison of Preconditioned {K}rylov Subspace Iteration Methods for {PDE}-Constrained Optimization Problems. {P}oisson and Convection-Diffusion Control}, institution = it, department = tdb, year = 2015, number = {2015-024}, month = aug, abstract = {Saddle point matrices of a special structure arise in optimal control problems. In this paper we consider distributed optimal control for various types of scalar stationary partial differential problems and compare the efficiency of several numerical solution methods. We test the particular case when the arising linear system can be compressed after eliminating the control function. In that case, a system arises in a form which enables application of an efficient block matrix preconditioner that previously has been applied to solve complex-valued systems in real arithmetic. Under certain conditions the condition number of the so preconditioned matrix is bounded by 2. The numerical and computational efficiency of the method in terms of number of iterations and elapsed time is favorably compared with other published methods. } } @TechReport{ it:2015-023, author = {Carlo Garoni and Stefano Serra-Capizzano}, title = {The theory of {G}eneralized {L}ocally {T}oeplitz sequences: a review, an extension, and a few representative applications}, institution = it, department = tdb, year = 2015, number = {2015-023}, month = aug, note = {Revised, corrected, updated and extended version of TR 2015-016.}, abstract = {We review and extend the theory of Generalized Locally Toeplitz (GLT) sequences, which goes back to the pioneering work by Tilli on Locally Toeplitz (LT) sequences and was developed by the second author during the last decade. Informally speaking, a GLT sequence $\{A_n\}_n$ is a sequence of matrices with increasing size, equipped with a Lebesgue-measurable function $\kappa$ (the so-called symbol). This function characterizes the asymptotic singular value distribution of $\{A_n\}_n$; in the case where the matrices $A_n$ are Hermitian, it also characterizes the asymptotic eigenvalue distribution of $\{A_n\}_n$. Three fundamental examples of GLT sequences are: (i) the sequence of Toeplitz matrices generated by a function $f$ in $L^1$; (ii) the sequence of diagonal sampling matrices containing the evaluations of a Riemann-integrable function $a$ over a uniform grid; (iii) any zero-distributed sequence, i.e., any sequence of matrices possessing an asymptotic singular value distribution characterized by the identically zero function. The symbol of the GLT sequence (i) is $f$, the symbol of the GLT sequence (ii) is $a$, and the symbol of any GLT sequence of the form (iii) is 0. The set of GLT sequences is a *-algebra. More precisely, suppose that $\{A_n^{(1)}\}_n,\ldots,\{A_n^{(r)}\}_n$ are GLT sequences with symbols $\kappa_1,\ldots,\kappa_r$, and let $A_n=\textup{ops}(A_n^{(1)},\ldots,A_n^{(r)})$ be a matrix obtained from $A_n^{(1)},\ldots,A_n^{(r)}$ by means of certain algebraic operations `ops', such as linear combinations, products, inversions and Hermitian transpositions; then, $\{A_n\}_n$ is a GLT sequence with symbol $\kappa=\textup{ops}(\kappa_1,\ldots,\kappa_r)$. As already proved in several contexts, the theory of GLT sequences is a powerful apparatus for computing the asymptotic singular value and eigenvalue distribution of the discretization matrices $A_n$ arising from the numerical approximation of continuous problems, such as integral equations and, especially, partial differential equations. Indeed, when the discretization parameter $n$ tends to infinity, the discretization matrices $A_n$ give rise to a sequence $\{A_n\}_n$, which often turns out to be a GLT sequence. However, in this work we are not concerned with the applicative interest of the theory of GLT sequences, for which we limit to outline some of the numerous applications and to refer the reader to the available literature. On the contrary, we focus on the mathematical foundations. We propose slight (but relevant) modifications of the original definitions, and we introduce for the first time the concept of LT sequences in the multivariate/multilevel setting. With the new definitions, based on the notion of approximating class of sequences, we are able to enlarge the applicability of the theory, by generalizing and/or simplifying a lot of key results. In particular, we remove a technical hypothesis concerning the Riemann-integrability of the so-called `weight functions', which appeared in the statement of many spectral distribution and algebraic results for GLT sequences. Moreover, we provide a formal and detailed proof of the fact that the sequences of matrices mentioned in items (i)--(iii) fall in the class of LT sequences. Several versions of this result were already present in previous papers, but only partial proofs were given. As a final step, we extend the theory of GLT sequences. We first prove an approximation result, which is particularly useful to show that a given sequence of matrices is a GLT sequence. By using this result, we provide a new and easier proof of the fact that $\{A_n^{-1}\}_n$ is a GLT sequence with symbol $\kappa^{-1}$ whenever $\{A_n\}_n$ is a GLT sequence of invertible matrices with symbol $\kappa$ and $\kappa\ne 0$ almost everywhere. Finally, using again the approximation result, we prove that $\{f(A_n)\}_n$ is a GLT sequence with symbol $f(\kappa)$, as long as $f:\mathbb{R}\to\mathbb{R}$ is continuous and $\{A_n\}_n$ is a GLT sequence of Hermitian matrices with symbol $\kappa$. This has important implications, e.g., in proving that the geometric mean of two GLT sequences is still a GLT sequence, with symbol given by the the geometric mean of the symbols.} } @TechReport{ it:2015-022, author = {Tomas Wilkinson and Anders Brun}, title = {Experiments on Large Scale Document Visualization using Image-based Word Clouds}, institution = it, department = vi2, year = 2015, number = {2015-022}, month = jul, abstract = {In this paper, we introduce image-based word clouds as a novel tool for a quick and aesthetic overviews of common words in collections of digitized text manuscripts. While OCR can be used to enable summaries and search functionality to printed modern text, historical and handwritten documents remains a challenge. By segmenting and counting word images, without applying manual transcription or OCR, we have developed a method that can produce word- or tag clouds from document collections. Our new tool is not limited to any specific kind of text. We make further contributions in ways of stop-word removal, class based feature weighting and visualization. An evaluation of the proposed tool includes comparisons with ground truth word clouds on handwritten marriage licenses from the 17th century and the George Washington database of handwritten letters, from the 18th century. Our experiments show that image-based word clouds capture the same information, albeit approximately, as the regular word clouds based on text data.} } @TechReport{ it:2015-021, author = {Joachim Parrow and Johannes Borgstr{\"o}m and Lars-Henrik Eriksson and Ram{\=u}nas Gutkovas and Tjark Weber}, title = {Modal Logics for Nominal Transition Systems}, institution = it, department = csd, year = 2015, number = {2015-021}, month = jun, note = {Accepted for CONCUR 2015. This version includes proofs in an appendix.}, abstract = {We define a uniform semantic substrate for a wide variety of process calculi where states and action labels can be from arbitrary nominal sets. A Hennessy-Milner logic for these systems is introduced, and proved adequate for bisimulation equivalence. A main novelty is the use of finitely supported infinite conjunctions. We show how to treat different bisimulation variants such as early, late and open in a systematic way, and make substantial comparisons with related work. The main definitions and theorems have been formalized in Nominal Isabelle. } } @TechReport{ it:2015-020, author = {Per Normann and Johan {\"O}fverstedt}, title = {Deterministic Parallel Graph Coloring with Hashing}, institution = it, department = tdb, year = 2015, number = {2015-020}, month = jun, abstract = {In this paper we propose a new deterministic parallel graph coloring algorithm. Parallelism is achieved by distribution of vertices to processors by hashing. The hashing is based on markers assigned to each conflict prone vertex.} } @TechReport{ it:2015-019, author = {Andreas Svensson and Thomas B Sch{\"o}n and Manon Kok}, title = {Some Details on State Space Smoothing using the Conditional Particle Filter}, institution = it, department = syscon, year = 2015, number = {2015-019}, month = jun, abstract = {This technical report gives some additional details on the numerical examples used in \emph{Nonlinear state space smoothing using the conditional particle filter}, by the same authors, published in Proceedings of the 17th IFAC Symposium on System Identication (SYSID), Beijing, China, October 2015.} } @TechReport{ it:2015-018, author = {Johannes Nygren and Torbj{\"o}rn Wigren and Kristiaan Pelckmans}, title = {Delay-Independent Stability Criteria for Networked Control Systems}, institution = it, department = syscon, year = 2015, number = {2015-018}, month = jun, abstract = {The report analyses a networked control system consisting of a LTI system coupled with a static nonlinearity, subject to large delays in the feedback loop. This model is valid for example in wireless data flow control, where a saturation occurs since the flow is one-directional. The present report extends previous results by proving necessity, in case the loop gain is uniformly less than 1. The results are validated and illustrated in a simulation study.} } @TechReport{ it:2015-017, author = {Adriaan Larmuseau and Marco Patrignani and Dave Clarke}, title = {A Secure Compiler for {ML} Modules - Extended Version}, institution = it, department = csd, year = 2015, number = {2015-017}, month = jun, abstract = {Many functional programming languages compile to low-level languages such as C or assembly. Most security properties of those compilers, however, apply only when the compiler compiles whole programs. This paper presents a compilation scheme that securely compiles a standalone module of ModuleML, a light-weight version of an ML with modules, into untyped assembly. The compilation scheme is secure in that it reflects the abstractions of a ModuleML module, for every possible piece of assembly code that it interacts with. This is achieved by isolating the compiled module through a low-level memory isolation mechanism and by dynamically type checking the low-level interactions. We evaluate an implementation of the compiler on relevant test scenarios.} } @TechReport{ it:2015-016, author = {Carlo Garoni and Stefano Serra-Capizzano}, title = {{G}eneralized {L}ocally {T}oeplitz sequences: a review and an extension}, institution = it, department = tdb, year = 2015, number = {2015-016}, month = may, note = {Revised, corrected, updated and extended by TR 2015-023 (\url{http://www.it.uu.se/research/publications/reports/2015-023}).} , abstract = {We review the theory of Generalized Locally Toeplitz (GLT) sequences, hereinafter called `the GLT theory', which goes back to the pioneering work by Tilli on Locally Toeplitz (LT) sequences and was developed by the second author during the last decade: every GLT sequence has a measurable symbol; the singular value distrbution of any GLT sequence is identified by the symbol (also the eigenvalue distribution if the sequence is made by Hermitian matrices); the GLT sequences form an algebra, closed under linear combinations, (pseudo)-inverse if the symbol vanishes in a set of zero measure, product and the symbol obeys to the same algebraic manipulations. As already proved in several contexts, this theory is a powerful tool for computing/analyzing the asymptotic spectral distribution of the discretization matrices arising from the numerical approximation of continuous problems, such as Integral Equations and, especially, Partial Differential Equations, including variable coefficients, irregular domains, different approximation schemes such as Finite Differences, Finite Elements, Collocation/Galerkin Isogeometric Analysis etc. However, in this review we are not concerned with the applicative interest of the GLT theory, for which we limit to refer the reader to the numerous applications available in the literature. On the contrary, we focus on the theoretical foundations. We propose slight (but relevant) modifications of the original definitions, which allow us to enlarge the applicability of the GLT theory. In particular, we remove a certain `technical' hypothesis concerning the Riemann-integrability of the so-called `weight functions', which appeared in the statement of many spectral distribution and algebraic results for GLT sequences. With the new definitions, we introduce new technical and useful results and we provide a formal proof of the fact that sequences formed by multilevel diagonal sampling matrices, as well as multilevel Toeplitz sequences, fall in the class of LT sequences; the latter results were mentioned in previous papers, but no direct proof was given especially regarding the case of multilevel diagonal sampling matrix-sequences. As a final step, we extend the GLT theory: we first prove an approximation result, which is particularly useful to show that a given sequence of matrices is a GLT sequence; by using this result, we provide a new and easier proof of the fact that $\{A_n^{-1}\}_n$ is a GLT sequence with symbol $\kappa^{-1}$ whenever $\{A_n\}_n$ is a GLT sequence of invertible matrices with symbol $\kappa$ and $\kappa\ne 0$ almost everywhere; finally, using again the approximation result, we prove that $\{f(A_n)\}_n$ is a GLT sequence with symbol $f(\kappa)$, as long as $f:\mathbb{R}\to\mathbb{R}$ is continuous and $\{A_n\}_n$ is a GLT sequence of Hermitian matrices with symbol $\kappa$. This latter theoretical property has important implications, e.g. in proving that the geometric means of GLT sequences are still GLT, so obtaining for free that the spectral distribution of the mean is just the geometric mean of the symbols.} } @TechReport{ it:2015-015, author = {Adriaan Larmuseau and Dave Clarke}, title = {Formalizing a Secure Foreign Function Interface - Extended Version}, institution = it, department = csd, year = 2015, number = {2015-015}, month = may, note = {This technical report is an extended version of the paper of the same name that is to appear at SEFM 2015.}, abstract = {Many high-level functional programming languages provide programmers with the ability to interoperate with untyped and low-level languages such as C and assembly. Research into such interoperation has generally focused on a closed world scenario, one where both the high-level and low-level code are defined and analyzed statically. In practice, however, components are sometimes linked in at run-time through malicious means. In this paper we formalize an operational semantics that securely combines MiniML, a light-weight ML, with a model of a low-level attacker, without relying on any static checks. We prove that the operational semantics are secure by establishing that they preserve and reflect the equivalences of MiniML. To that end a notion of bisimulation for the interaction between the attacker and MiniML is developed.} } @TechReport{ it:2015-014, author = {Timofey Mukha and Mattias Liefvendahl}, title = {Large-Eddy Simulation of Turbulent Channel Flow}, institution = it, department = tdb, year = 2015, number = {2015-014}, month = may, abstract = {Channel flow, at $Re_{\tau}\approx 395$, has been computed using large-eddy simulation (LES) and the general-purpose software \texttt{OpenFOAM}. The purpose of the study is to evaluate the applicability of \texttt{OpenFOAM} for modelling wall-bounded turbulent flows, of which channel flow is a canonical example. A wide range of statistical quantities is reported, which includes: the mean velocity; the fluctuations of velocity; the turbulent shear stress; higher order statistical moments, skewness and flatness; the fluctuations of vorticity; spatial two-point correlations of velocity fluctuations. The results are compared with data obtained using direct numerical simulation (DNS). The simulations are carried out on three grid refinement levels and a detailed analysis of how the grid size affects the solution is given. The general conclusion that can be drawn is that \texttt{OpenFOAM} can be successfully used for LES of wall-bounded flows. The investigation of grid convergence showed that even relatively coarse grids can be used if only first order statistics are of interest. However, for higher order statistical moments, the gain of using finer grids is evident.} } @TechReport{ it:2015-013, author = {Torbj{\"o}rn Wigren}, title = {Constraints in nonlinear $\mathcal{L}_2$-stable networked control}, institution = it, department = syscon, year = 2015, number = {2015-013}, month = may, abstract = {The report derives a robust networked controller design method for systems with saturation where the delay is large and unknown, as in unidirectional flow-control. A classical linear robust criterion is first formulated in terms of the sensitivity- and complementary sensitivity functions. Based on the Popov-criterion a new asymptotic constraint is derived, which specifies the minimum amount of low frequency gain in the sensitivity function, to guarantee non-linear closed loop $\mathcal{L}_2$-stability. This result guides the selection of the design criterion, thereby adjusting the linear controller design for better handling of delay and saturation. The controller design method then uses gridding to pre-compute the $\mathcal{L}_2$ stability region. Based on the pre-computed stability region, a robust $\mathcal{L}_2$-stable controller can be selected. Alternatively, an adaptive controller could recompute $\mathcal{L}_2$-stable controllers on-line using the pre-computed stability region. Simulations show that the controller meets the specified stability and performance requirements.} } @TechReport{ it:2015-012, author = {Carlo Garoni and Carla Manni and Stefano Serra-Capizzano and Debora Sesana and Hendrik Speleers}, title = {Lusin Theorem, {GLT} Sequences and Matrix Computations: An Application to the Spectral Analysis of {PDE} Discretization Matrices}, institution = it, department = tdb, year = 2015, number = {2015-012}, month = mar, abstract = {We extend previous results on the spectral distribution of discretization matrices arising from B-spline Isogeometric Analysis (IgA) approximations of a general $d$-dimensional second-order elliptic Partial Differential Equation (PDE) with variable coefficients. First, we provide the spectral symbol of the Galerkin B-spline IgA stiffness matrices, assuming only that the PDE coefficients belong to $L^{\infty}$. This symbol describes the asymptotic spectral distribution when the fineness parameters tend to zero (so that the matrix-size tends to infinity). Second, we prove the positive semi-definiteness of the $d\times d$ symmetric matrix in the Fourier variables $(\theta_1,\ldots,\theta_d)$, which appears in the expression of the symbol. This matrix is related to the discretization of the (negative) Hessian operator, and its positive semi-definiteness implies the non-negativity of the symbol. The mathematical arguments used in our derivation are based on the Lusin theorem, on the theory of Generalized Locally Toeplitz (GLT) sequences, and on careful Linear Algebra manipulations of matrix determinants. These arguments are very general and can be also applied to other PDE discretization techniques than B-spline IgA.} } @TechReport{ it:2015-011, author = {Ali Dorostkar and Maya Neytcheva and Stefano Serra-Capizzano}, title = {Schur Complement Matrix and its (Elementwise) Approximation: A Spectral Analysis Based on {GLT} Sequences}, institution = it, department = tdb, year = 2015, number = {2015-011}, month = mar, abstract = {Using the notion of the so-called \textit{spectral symbol} in the Generalized Locally Toeplitz (GLT) setting, we derive the GLT symbol of the sequence of matrices $\{A_n\}$ approximating the elasticity equations. Further, as the GLT class defines an algebra of matrix sequences and Schur complements are obtained via elementary algebraic operation on the blocks of $A_n$, we derive the symbol $f^{\mathcal{S}}$ of the associated sequences of Schur complements $\{S_n\}$ and that of its element-wise approximation. } } @TechReport{ it:2015-010, author = {Kjellin Lagerqvist, Nanna}, title = {Resultat och reflektioner kring mailkategorisering av anv{\"a}ndares mail till Uppsala l{\"a}ns landsting kring {\aa}tkomst av journaler via n{\"a}tet}, institution = it, department = docs, year = 2015, number = {2015-010}, month = mar, note = {In Swedish. Updated and superceded by Technical Report nr 2015-029.}, abstract = {P{\aa} uppdrag av Benny Eklund, Uppsala l{\"a}ns landsting, har mail inkomna till mailboxen med adress sustains@lul.se under fj{\"a}rde kvartalet 2012 till och med f{\"o}rsta kvartalet 2014 l{\"a}sts och kategoriserats. Mailboxens syfte har varit att motta feedback fr{\aa}n anv{\"a}ndare av Sustains, senare Journalen. Nedan ben{\"a}mns b{\aa}da versionerna av systemet vid samlingsnamnet Journal via n{\"a}tet. Feedbacken var av typen fritt formulerade {\aa}sikter och erfarenheter. Anv{\"a}ndarna har inte anv{\"a}nt n{\aa}gon mall eller svarat p{\aa} n{\aa}gra specifika fr{\aa}gor utan har ombetts ber{\"a}tta fritt om sina upplevelser. Under den iterativa genomg{\aa}ngen av mailen utformades fjorton kategorier utifr{\aa}n vilka anv{\"a}ndarnas feedback sorterades. Resultatet av detta visade att de flesta anv{\"a}ndare var positiva till Journal via n{\"a}tet och det fr{\"a}msta anv{\"a}ndningsomr{\aa}det var som minnesanteckning och komplement till ett tidigare v{\aa}rdbes{\"o}k. {\"A}ven anv{\"a}ndare som inte hade n{\aa}gon v{\aa}rdkontakt i Uppsala l{\"a}n, och d{\"a}rf{\"o}r inte s{\aa}g n{\aa}got inneh{\aa}ll i journalen, var positiva till tj{\"a}nsten och efterfr{\aa}gade den till sina landsting.} } @TechReport{ it:2015-009, author = {Emilie Blanc}, title = {Approximation of the Diffusive Representation by Decreasing Exponential Functions}, institution = it, department = tdb, year = 2015, number = {2015-009}, month = feb, abstract = {Diffusive representation of pseudo-differential operators is a theory that has attracted interest during the last years, and the discretization of the diffusive representation allows significant simplifications for numerical integration encountered in many physical models. The feasibility of such integration relies on efficient determination of the coefficients involved in the representation. In this report a novel way to determine such coefficients is proposed based on non-linear optimization. Compared to the typical method using Gaussian quadrature, our new approach is shown to lead to more accurate approximation in a particular case with application to computational biology.} } @TechReport{ it:2015-008, author = {Ali Dorostkar and Maya Neytcheva and Stefano Serra-Capizzano}, title = {Spectral Analysis of Coupled {PDE}s and of their {S}chur Complements via the Notion of {G}eneralized {L}ocally {T}oeplitz Sequences}, institution = it, department = tdb, year = 2015, number = {2015-008}, month = feb, abstract = {We consider large linear systems of algebraic equations arising from the Finite Element approximation of coupled partial differential equations. As case study we focus on the linear elasticity equations, formulated as a saddle point problem to allow for modeling of purely incompressible materials. Using the notion of the so-called \textit{spectral symbol} in the Generalized Locally Toeplitz (GLT) setting, we derive the GLT symbol (in the Weyl sense) of the sequence of matrices $\{A_n\}$ approximating the elasticity equations. Further, exploiting the property that the GLT class { defines an algebra of matrix sequences} and the fact that the Schur complements are obtained via elementary algebraic operation on the blocks of $A_n$, we derive the symbols $f^{\mathcal{S}}$ of the associated sequences of Schur complements $\{S_n\}$. As a consequence of the GLT theory, the eigenvalues of $S_n$ for large $n$ are described by a sampling of $f^{\mathcal{S}}$ on a uniform grid of its domain of definition. We extend the existing GLT technique with novel elements, related to block-matrices and Schur complement matrices, and illustrate the theoretical findings with numerical tests. } } @TechReport{ it:2015-007, author = {Aletta Nyl{\'e}n and Mohamed Shenify}, title = {Proceedings from the 1st {A}lbaha {U}niversity--{U}ppsala {U}niversity Collaborative Symposium on Quality in Computing Education}, institution = it, year = 2015, number = {2015-007}, month = feb, abstract = {This is the proceedings from the first AlBaha University - Uppsala University Collaborative Symposium on Quality in Computing Education (ABU3QCE), held in AlBaha, Saudi Arabia, 24-25 February 2015. ABU3QCE 2015 is a local symposium dedicated to the exchange of research and practice focusing on enhancing quality in computing education. Contributions cover a broad spectrum of computing education challenges ranging from; computer science, computer engineering, computer information systems, computer information technology to software engineering education. ABU3QCE aims to publish research that combines teaching and learning experience with theoretically founded research within the field. The proceedings papers cover a wide range of topics such as cultural aspects of teaching and learning, technology enhanced teaching, and professional competencies and their role in the curriculum and in higher education. The symposium is a collaborative initiative of AlBaha University, Saudi Arabia, and Uppsala University, Sweden. It is our hope that this symposium will highlight current efforts, and also be the starting point for discussions, and inspire others to contribute to take the quality of computing education one step further.} } @TechReport{ it:2015-006, author = {Farshid Besharati and Mahdad Davari and Christian Danheimer Furedal and Bj{\"o}rn Forsberg and Niklas Forsmark and Henrik Grandin and Jimmy Gunnarsson and Engla Ling and Marcus Lofvars and Sven Lundgren and Luis Mauricio and Erik Norgren and Magnus Norgren and Johan Risch and Christos Sakalis and Stefanos Kaxiras}, title = {The {EVI} Distributed Shared Memory System}, institution = it, department = docs, year = 2015, number = {2015-006}, month = feb, abstract = {With the data handled by companies and research institutes getting larger and larger every day, there is a clear need for faster computing. At the same time, we have reached the limit of power consumption and more power efficient computing is also called for, both in the datacenter and in the supercomputer room. For that, there is a great push, both in industry and academia, towards increasing the amount of computing power per watt consumed. With this shift towards a different computing paradigm, many older ideas are looked upon in a new light. One of these is the distributed shared memory (DSM) systems. It is becoming harder and harder to achieve higher performance and better power efficiency at the same form factor as we have always had. Furthermore, while we have seen a stop in the constant increase of processor speeds, there is a constant increase in network communication speeds. Software implemented DSM is again a viable solution for high performance computing, without the need for sacrificing ease of programming for performance gains. The goal of this course was to develop such a system, and learn in the process. We chose to work with the Adapteva Parallella boards and design a DSM system there. Over one semester we designed and developed that system.} } @TechReport{ it:2015-005, author = {Carlo Garoni and Carla Manni and Stefano Serra-Capizzano and Debora Sesana and Hendrik Speleers}, title = {Spectral Analysis and Spectral Symbol of Matrices in Isogeometric {G}alerkin Methods}, institution = it, department = tdb, year = 2015, number = {2015-005}, month = jan, abstract = {A linear full elliptic second order Partial Differential Equation (PDE), defined on a $d$-dimensional domain $\Omega$, is approximated by the isogeometric Galerkin method based on uniform tensor-product B-splines of degrees $(p_1,\ldots,p_d)$. The considered approximation process leads to a $d$-level stiffness matrix, banded in a multilevel sense. This matrix is close to a $d$-level Toeplitz structure when the PDE coefficients are constant and the physical domain $\Omega$ is just the hypercube $(0,1)^d$ without using any geometry map. In such a simplified case, a detailed spectral analysis of the stiffness matrices has been carried out in a previous work. In this paper, we complete the picture by considering non-constant PDE coefficients and an arbitrary domain $\Omega$, parameterized with a non-trivial geometry map. We compute and study the spectral symbol of the related stiffness matrices. This symbol describes the asymptotic eigenvalue distribution when the fineness parameters tend to zero (so that the matrix-size tends to infinity). The mathematical technique used for computing the symbol is based on the theory of Generalized Locally Toeplitz (GLT) sequences.} } @TechReport{ it:2015-004, author = {Sofia Cassel and Falk Howar and Bengt Jonsson and Bernhard Steffen}, title = {Learning Extended Finite State Machines (extended version)}, institution = it, department = docs, year = 2015, number = {2015-004}, month = feb, note = {Updated and superceded by Technical Report 2015-032.}, abstract = {We present a black-box active learning algorithm for inferring extended finite state machines (EFSM)s, combining data flow and control behavior. Different dialects of EFSMs are widely used in tools for model-based software development, verification, and testing. Our algorithm infers a class of EFSMs called \emph{register automata}. Register automata have a finite control structure, extended with variables (registers), assignments, and guards. Our algorithm is parameterized on a particular theory, i.e., a set of operations and tests on the data domain that can be used in guards. Key to our learning technique is a novel learning model based on so-called \emph{tree queries}. The learning algorithm uses the tree queries to infer symbolic data constraints on parameters, e.g., sequence numbers, time stamps, identifiers, or even simple arithmetic. We describe sufficient conditions for the properties that the symbolic constraints provided by a tree query in general must have to be usable in our learning model. We also show that, under these conditions, our framework induces a generalization of the classical Nerode equivalence and canonical automata construction to the symbolic setting. We have evaluated our algorithm in a black-box scenario, where tree queries are realized through (black-box) testing. Our case studies include connection establishment in TCP and a priority queue from the Java Class Library.} } @TechReport{ it:2015-003, author = {Ali Dorostkar and Maya Neytcheva and Bj{\"o}rn Lund}, title = {On Some Block-Preconditioners for Saddle Point Systems and their {CPU-GPU} Performance}, institution = it, department = tdb, year = 2015, number = {2015-003}, month = jan, abstract = {In this work we emphasize some aspects of the numerical and computational performance of block preconditioners for systems with matrices of saddle point form. We discuss the quality of a sparse approximation of the related Schur complement for constructing an efficient preconditioner and the achieved numerical efficiency in terms of number of iterations. We also present a performance study of the computational efficiency of the corresponding preconditioned iterative solution methods, implemented using publicly available numerical linear algebra software packages, both on multicore CPU and GPU devices. We show that the presently available GPU accelerators can be very efficiently used in computer simulations involving inner-outer solution methods and hierarchical data structures. The benchmark problem originates from a geophysical application, namely, the elastic Glacial Isostatic Adjustment model, discretized using the finite element method.} } @TechReport{ it:2015-002, author = {Marco Donatelli and Mariarosa Mazza and Stefano Serra-Capizzano}, title = {Spectral Analysis and Structure Preserving Preconditioners for Fractional Diffusion Equations}, institution = it, department = tdb, year = 2015, number = {2015-002}, month = jan, abstract = {Fractional partial order diffusion equations are a generalization of classical partial differential equations, used to model anomalous diffusion phenomena. When using the implicit Euler formula and the shifted Gr{\"u}nwald formula, it has been shown that the related discretizations lead to a linear system whose coefficient matrix has a Toeplitz-like structure. In this paper we focus our attention on the case of variable diffusion coefficients. Under appropriate conditions, we show that the sequence of the coefficient matrices belongs to the Generalized Locally Toeplitz class and we compute the symbol describing its asymptotic eigenvalue distribution, as the matrix size diverges. We employ the spectral information for analyzing known methods of preconditioned Krylov and multigrid type, with both positive and negative results and with a look forward to the multidimensional setting. We also propose two new tridiagonal structure preserving preconditioners to solve the resulting linear system, with Krylov methods such as CGNR and GMRES. A number of numerical examples shows that our proposal is more effective than recently used circulant preconditioner.} } @TechReport{ it:2015-001, author = {Victor Shcherbakov and Elisabeth Larsson}, title = {Radial Basis Function Partition of Unity Methods for Pricing Vanilla Basket Options}, institution = it, department = tdb, year = 2015, number = {2015-001}, month = jan, abstract = {Mesh-free methods based on radial basis function (RBF) approximation are widely used for solving PDE problems. They are flexible with respect to the problem geometry and highly accurate. A disadvantage of these methods is that the linear system to be solved becomes dense for globally supported RBFs. A remedy is to introduce localisation techniques such as partition of unity (PU). RBF-PU methods allow for significant sparsification of the linear system and lower the computational effort. In this work we apply a global RBF method as well as an RBF-PU method to problems in option pricing. We consider one- and two-dimensional vanilla options. In order to price American options we employ a penalty approach. The RBF-PU method is competitive with already existing methods and the results are promising for extension to higher-dimensional problems.} } @TechReport{ it:2014-024, author = {Stefano Serra-Capizzano}, title = {{T}oeplitz Matrices: Spectral Properties and Preconditioning in the {CG} Method}, institution = it, department = tdb, year = 2014, number = {2014-024}, month = dec, abstract = {We consider multilevel Toeplitz matrices $T_n(f)$ generated by Lebes\-gue integrable functions $f$ defined over $I^d$, $I=[-\pi,\pi)$, $d\ge 1$. We are interested in the solution of linear systems with coefficient matrix $T_n(f)$ when the size of $T_n(f)$ is large. Therefore the use of iterative methods is recommended for computational and numerical stability reasons. In this note we focus our attention on the (preconditioned) conjugate gradient (P)CG method and on the case where the symbol $f$ is known and univariate ($d=1$): the second section treat spectral properties of Toeplitz matrices $T_n(f)$; the third deals with the spectral behavior of $T_n^{-1}(g) T_n(f)$ and the fourth with the band Toeplitz preconditioning; in the fifth section we consider the matrix algebra preconditioning through the Korovkin theory. Then in the sixth section we study the multilevel case $d>1$ by emphasizing the results that have a plain generalization (those in the Sections 2, 3, and 4) and the results which strongly depend on the number $d$ of levels (those in Section 5): in particular the quality of the matrix algebra preconditioners (circulants, trigonometric algebras, Hartley etc.) deteriorates sensibly as $d$ increases. A section of conclusive remarks and two appendices treating the theory of the (P)CG method and spectral distributional results of structured matrix sequences.} } @TechReport{ it:2014-023, author = {Venkatraman Iyer and Frederik Hermans and Thiemo Voigt}, title = {Detecting and Avoiding Multiple Sources of Interference in the 2.4 {GHz} Spectrum}, institution = it, department = docs, year = 2014, number = {2014-023}, month = dec, abstract = {Sensor networks operating in the 2.4 GHz band often face cross-technology interference from co-located WiFi and Bluetooth devices. To enable effective interference mitigation, a sensor network needs to know the type of interference it is exposed to. However, existing approaches to interference detection are not able to handle multiple concurrent sources of interference. In this paper, we address the problem of identifying multiple channel activities impairing a sensor network~s communication, such as simultaneous WiFi traffic and Bluetooth data transfers. We present SpeckSense, an interference detector that distinguishes between different types of interference using a unsupervised learning technique. Additionally, SpeckSense features a classifier that distinguishes between moderate and heavy channel traffic, and also identifies WiFi beacons. In doing so, it facilitates interference avoidance through channel blacklisting. We evaluate SpeckSense on common mote hardware and show how it classifies concurrent interference under real-world settings. We also show how SpeckSense improves the performance of an existing multichannel data collection protocol by 30\%.} } @TechReport{ it:2014-022, author = {Carlo Garoni and Stefano Serra-Capizzano and Paris Vassalos}, title = {Tools for Determining the Asymptotic Spectral Distribution of {H}ermitian Matrix-Sequences and Applications}, institution = it, department = tdb, year = 2014, number = {2014-022}, month = dec, abstract = {We consider sequences of Hermitian matrices with increasing dimension, and we provide a general tool for deducing the asymptotic spectral distribution of a `difficult' sequence $\{A_n\}_n$, starting from the one of `simpler' sequences $\{B_{n,m}\}_n$ that approximate $\{A_n\}_n$ when $m\to\infty$. The tool is based on the notion of approximating class of sequences (a.c.s.), which was inspired by the work of Paolo Tilli and the second author, and is applied here in a more general setting. An a.c.s.-based proof of the famous Szego theorem on the spectral distribution of Toeplitz matrices is finally presented.} } @TechReport{ it:2014-021, author = {Carlo Garoni and Stefano Serra-Capizzano and Debora Sesana}, title = {Spectral Analysis and Spectral Symbol of $d$-variate $\mathbb{Q}_p$ {L}agrangian {FEM} Stiffness Matrices}, institution = it, department = tdb, year = 2014, number = {2014-021}, month = nov, abstract = {We study the spectral properties of the stiffness matrices coming from the $\mathbb{Q}_p$ Lagrangian FEM approximation of $d$-dimensional second order elliptic differential problems; here, $p=(p_1,\ldots,p_d)\in\mathbb{N}^d$ and $p_j$ represents the polynomial approximation degree in the $j$-th direction. After presenting a construction of these matrices, we investigate the conditioning (behavior of the extremal eigenvalues and singular values) and the asymptotic spectral distribution in the Weyl sense, and we find out the so-called (spectral) symbol describing the asymptotic spectrum. We also study the properties of the symbol, which turns out to be a $d$-variate function taking values in the space of $D(p)\times D(p)$ Hermitian matrices, where $D(p)=\prod_{j=1}^d p_j$. Unlike the stiffness matrices coming from the $p$\,-degree B-spline IgA approximation of the same differential problems, where a unique $d$-variate real-valued function describes all the spectrum, here the spectrum is described by $D(p)$ different functions, that is the $D(p)$ eigenvalues of the symbol, which are well-separated, far away, and exponentially diverging with respect to $p$ and $d$. This very involved picture provides a clean explanation of: a) the difficulties encountered in designing robust solvers, with convergence speed independent of the matrix size, of the approximation parameters $p$, and of the dimensionality $d$; b) the possible convergence deterioration of known iterative methods, already for moderate $p$ and $d$.} } @TechReport{ it:2014-020, author = {Magnus Grandin and Sverker Holmgren}, title = {Parallel Data Structures and Algorithms for High-Dimensional Structured Adaptive Mesh Refinement}, institution = it, department = tdb, year = 2014, number = {2014-020}, month = oct, abstract = {Numerical solution of high-dimensional partial differential equations often results in challenging computations. Using a uniform discretization of the spatial domain quickly becomes untractable due to the exponential increase in problem size with dimensionality. However, by employing a spatially adaptive discretization scheme the number of grid points can often be reduced significantly. In this note we describe a parallel version of an earlier presented adaptive scheme which generates the mesh by recursive bisection, allowing mesh blocks to be arbitrarily anisotropic to allow for fine structures in some directions without over-refining in other directions. We extend the serial framework by presenting parallel algorithms for organizing the mesh blocks in a distributed kd-tree and the necessary operations for implementing structured adaptive mesh refinement on a parallel computer system.} } @TechReport{ it:2014-019, author = {Magnus Grandin}, title = {Data Structures and Algorithms for High-Dimensional Structured Adaptive Mesh Refinement}, institution = it, department = tdb, year = 2014, number = {2014-019}, month = oct, abstract = {Spatial discretization of high-dimensional partial differential equations requires data representations that are of low overhead in terms of memory and complexity. Uniform discretization of computational domains quickly grows out of reach due to an exponential increase in problem size with dimensionality. Even with spatial adaptivity, the number of mesh data points can be unnecesarily large if care is not taken as to where refinement is done. We propose an adaptive scheme that generates the mesh by recursive bisection, allowing mesh blocks to be arbitrarily anisotropic to allow for fine structures in some directions without over-refining in those directions that suffice with less refinement. We describe in detail how the mesh blocks are organized in a kd-tree and the algorithms that update the mesh as is necessary for preserved accuracy in the solution. Algorithms for refinement, coarsening and 2:1 balancing of a mesh hierarchy are derived, and we describe how information is retrieved from the tree structure by means of binary search. To show the capabilities of our framework, we present results showing examples of generated meshes and evaluate the algorithmic scalability on a suite of test problems. In summary, we conclude that although the worst-case complexity of sorting the nodes and building the node map index is $n^2$, the average runtime scaling in our examples is no worse than $n \log n$.} } @TechReport{ it:2014-018, author = {Olov Ros{\'e}n and Alexander Medvedev}, title = {Parallelization of the Particle Filter via Series Expansions}, institution = it, department = syscon, year = 2014, number = {2014-018}, month = aug, abstract = {Parallelizability of an algorithm is nowadays a highly desirable property as computer hardware is becoming increasingly parallel. In this paper, a formulation of the particle filtering algorithm, suitable for parallel or distributed computing, is proposed. From the particle set, a series expansion is fitted to the posterior probability density function. The global information expressed by the particles can in this way be compressed to a few informative coefficients that can be efficiently communicated between the local processing units. Experiments on a shared-memory multicore processor using up to 8 cores show that linear speedup in the number of used cores is achieved.} } @TechReport{ it:2014-017, author = {David R. Cok and Aaron Stump and Tjark Weber}, title = {The 2013 {SMT} Evaluation}, institution = it, department = csd, year = 2014, number = {2014-017}, month = jul, abstract = {After 8 years of SMT Competitions, the SMT Steering Committee decided, for 2013, to sponsor an evaluation of the status of SMT benchmarks and solvers, rather than another competition. This report summarizes the results of the evaluation, conducted by the authors. The key observations are that (1) the competition results are quite sensitive to randomness and (2) the most significant need for the future is assessment and improvement of benchmarks in the light of SMT applications. The evaluation also measured competitiveness of solvers, general coverage of solvers, logics, and benchmarks, and degree of repeatability of measurements and competitions.} } @TechReport{ it:2014-016, author = {Torbj{\"o}rn Wigren}, title = {Identifiability and Limit Cycles}, institution = it, department = syscon, year = 2014, number = {2014-016}, month = may, abstract = {The report discusses when the non-linear dynamic equations of a non-linear system in a limit-cycle can be determined from measured data. The minimal order needed for this turns out to be the minimal dimension in which the stable orbit of the system does not intersect itself. This is illustrated with a fourth order spiking neuron model, which is identified using a non-linear second order differential equation model.} } @TechReport{ it:2014-015, author = {Sven-Olof Nystr{\"o}m}, title = {Subtyping, consistency and derivability}, institution = it, department = csd, year = 2014, number = {2014-015}, month = may, abstract = {Earlier work on subtyping has focused on the problem of constructing a typing for a given program. This paper considers a slightly different problem: Given a lambda term, is the corresponding constraint system \emph{consistent}? An $O(n^3)$ algorithm for checking the consistency of constraint systems is presented, where $n$ is the size of the constraint system. The paper also considers the problem of \emph{derivability}, i.e., whether a property can be derived from the corresponding constraint system and gives an $O(n^3)$ algorithm for checking derivability of a class of constraints.} } @TechReport{ it:2014-014, author = {Torbj{\"o}rn Wigren}, title = {{MATLAB} Software for Identification of Nonlinear Autonomous Systems - Revision 1}, institution = it, department = syscon, year = 2014, number = {2014-014}, month = apr, note = {The software package can be downloaded from \url{http://www.it.uu.se/research/publications/reports/2014-014/SWAutonomous.zip}.} , abstract = {This report is intended as a user~s manual for a package of MATLAB scripts and functions, developed for recursive and batch identification of nonlinear autonomous state space models of order 2. The core of the package consists of implementations of four algorithms for this purpose. There are two least squares batch schemes and two recursive algorithms based on Kalman filtering techniques. The algorithms are based on a continuous time, structured black box state space model of a nonlinear autonomous system of order 2. The software can only be run off-line, i.e. no true real time operation is possible. The recursive algorithms are however implemented so that true on-line operation can be obtained by extraction of the main algorithmic loops. The user must then provide the real time environment. The software package contains scripts and functions that allow the user to either input live measurements or to generate test data by simulation. The functionality for display of results include scripts for plotting of data and parameters. The estimated model obtained at the end of a run can be simulated and the model output plotted, alone or together with the data used for identification.} } @TechReport{ it:2014-013, author = {Thomas Lind and {\AA}sa Cajander and Bengt Sandblad}, title = {Inf{\"o}randet av {R}aindance Bokf{\"o}ringsportal vid Uppsala universitet - En kortfattad rapport fr{\aa}n {KiA}-projektet}, institution = it, department = vi2, year = 2014, number = {2014-013}, month = apr, note = {In Swedish.}, abstract = {Under senv{\aa}ren och h{\"o}sten 2013 genomf{\"o}rdes en studie f{\"o}r att dra l{\"a}rdomar av inf{\"o}randet som skedde inf{\"o}r {\aa}rsskiftet 2012-2013 utav Bokf{\"o}ringsportalen, en del av ekonomisystemet Raindance vid Uppsala universitet. Ut{\"o}ver Bokf{\"o}ringsportalen har studien {\"a}ven ber{\"o}rt fr{\aa}gor om inf{\"o}rande av IT kopplat till avdelningen f{\"o}r ekonomi och upphandling mer generellt. Studien har utf{\"o}rts inom ramen f{\"o}r KiA-projektet och baseras p{\aa} fyra intervjuer med personal vid universitetsf{\"o}rvaltningens avdelning f{\"o}r ekonomi och upphandling samt sex intervjuer med ekonomiadministrat{\"o}rer fr{\aa}n olika institutioner vid universitetet. Intervjuerna visar p{\aa} ett brett spektra av uppfattningar och inst{\"a}llningar till inf{\"o}randet av Bokf{\"o}ringsportalen, d{\"a}r vissa anser att hela deras arbetssituation har f{\"o}r{\"a}ndrats medan andra ser f{\"o}r{\"a}ndringen som knappt m{\"a}rkbar. Synen p{\aa} f{\"o}r{\"a}ndringen som positiv eller negativ varierar ocks{\aa} bland respondenterna. Vitt skilda f{\"o}rh{\aa}llanden i organisationen av ekonomiadministrat{\"o}rernas roller och arbetsuppgifter, samt faktorer r{\"o}rande institutionerna i {\"o}vrigt, bidrar till att skapa v{\"a}ldigt olika f{\"o}ruts{\"a}ttningar vid f{\"o}r{\"a}ndring av IT som p{\aa}verkar administrat{\"o}rernas arbetsmilj{\"o}.} } @TechReport{ it:2014-012, author = {Per L{\"o}tstedt and Lina Meinecke}, title = {Simulation of Stochastic Diffusion via First Exit Times}, institution = it, department = tdb, year = 2014, number = {2014-012}, month = apr, abstract = {In molecular biology it is of interest to simulate diffusion stochastically. In the mesoscopic model we partition a biological cell into voxels in an unstructured mesh. In each voxel the number of molecules is recorded at each time step and molecules can jump between neighboring voxels to model diffusion. The jump rates are computed by discretizing the diffusion equation on the unstructured mesh. If the mesh is of poor quality, due to a complicated cell geometry, standard discretization methods can generate negative jump coefficients, which no longer allows the interpretation as the probability to leave the subvolume. We propose a method based on the mean first exit time of a molecule from a subvolume, which guarantees positive jump coefficients. Two approaches to exit times, a global and a local one, are presented and tested in simulations with diffusion and chemical reactions on meshes of different quality in two dimensions.} } @TechReport{ it:2014-011, author = {Martin Tillenius and Elisabeth Larsson and Erik Lehto and Natasha Flyer}, title = {A Task Parallel Implementation of an {RBF}-generated Finite Difference Method for the Shallow Water Equations on the Sphere}, institution = it, department = tdb, year = 2014, number = {2014-011}, month = apr, abstract = {Radial basis function-generated finite difference (RBF-FD) methods have recently been proposed as very interesting for global scale geophysical simulations, and have been shown to outperform established pseudo-spectral and discontinuous Galerkin methods for shallow water test problems. In order to be competitive for very large scale simulations, the implementation of the RBF-FD methods needs to be efficient and adapted for modern multicore based computer architectures. The main computational operations in the method consist of unstructured sparse matrix-vector multiplications, which are in general not well suited for multicore-based computers. In this work, the method is implemented for clusters of multicore computers using a task-based parallel programming model. Performance experiments showed that our implementation achieves 71\% of theoretical speedup within one computational node, and 90~100\% of linear speedup between nodes. A speedup of 178 times compared with the original MATLAB implementation was achieved for a global shallow water problem with a 30km resolution. } } @TechReport{ it:2014-010, author = {Martin Tillenius}, title = {{SuperGlue}: A Shared Memory Framework using Data-Versioning for Dependency-Aware Task-Based Parallelization}, institution = it, department = tdb, year = 2014, number = {2014-010}, month = apr, abstract = {In computational science, making efficient use of modern multicore based computer hardware is necessary in order to deal with complex real-life application problems. However, with increased hardware complexity, the cost in man hours of writing and re-writing software to adapt to evolving computer systems is becoming prohibitive. Task based parallel programming models aim to allow the application programmers to focus on the algorithms and applications, while the performance is handled by a runtime system that schedules the tasks onto nodes, cores, and accelerators. In this paper we describe a task parallel programming model where dependencies are represented through data versioning. The benefits of using this type of model are that it is easy to represent different types of dependencies and that scheduling decisions can be made locally. We implement this as a freely available C++ header-only template library, and show experimental results indicating that our implementation scales well, and performs well in comparison to other related efforts.} } @TechReport{ it:2014-009, author = {Torbj{\"o}rn Wigren}, title = {On a Limitation in Networked Flow Control}, institution = it, department = syscon, year = 2014, number = {2014-009}, month = mar, abstract = {The paper analyzes a continuous time flow control system, with flow of a general quantity from a source node to a sink node. The flow is one-directional, meaning that there is a saturation between the nodes that limits the flow to be positive and below a maximum. The controlled plant is located in the sink node and the controller is located in the source node. The plant and the controller are modeled by linear filters parameterized with poles and zeros. Feed forward control from measured disturbances is included. Delays affect both the downlink control signal and the uplink measurement signals. The paper proves that for large delays, $L_2$-stability does not follow from the Popov criterion unless the quotient of the products of all zeros and the product of all poles is less than $1/kG_p$, where $k$ is the slope of the saturation and $G_p$ is the gain constant of the loop gain. In case the plant models a leaky reservoir, the conclusion is that the amount of low frequency gain of the controller cannot be arbitrarily high at the same time as the amount of leakage of the reservoir is arbitrarily low. In communications this means that an increased requirement to regulate static errors of the reservoir needs to be accompanied by a reduced flow capacity.} } @TechReport{ it:2014-008, author = {Torsten S{\"o}derstr{\"o}m and Roberto Diversi and Umberto Soverini}, title = {A Generalized Instrumental Variable Framework for {EIV} Identification Methods when the Measurement Noises are Mutually Correlated}, institution = it, department = syscon, year = 2014, number = {2014-008}, month = mar, abstract = {In this paper, the previously introduced Generalized Instrumental Variable Estimator (GIVE) is extended to the case of errors-in-variables models where the additive input and output noises are mutually correlated white processes. It is shown how many estimators proposed in the literature can be described as various special cases of a generalized instrumental variable framework. It is also investigated how to analyze the common situation where some of the equations that define the estimator are to hold exactly, and others to hold approximately in a least squares sense, providing a detailed study of the accuracy analysis.} } @TechReport{ it:2014-007, author = {Ali Dorostkar and Dimitar Lukarski and Bj{\"o}rn Lund and Maya Neytcheva and Yvan Notay and Peter Schmidt}, title = {Parallel Performance Study of Block-Preconditioned Iterative Methods on Multicore Computer Systems}, institution = it, department = tdb, year = 2014, number = {2014-007}, month = mar, abstract = {In this work we benchmark the performance of a preconditioned iterative method, used in large scale computer simulations of a geophysical application, namely, the elastic Glacial Isostatic Adjustment model. The model is discretized using the finite element method. It gives raise to algebraic systems of equations with matrices that are large, sparse, nonsymmetric, indefinite and with a saddle point structure. The efficiency of solving systems of the latter type is crucial as it is to be embedded in a time-evolution procedure, where systems with matrices of similar type have to be solved repeatedly many times. The computer code for the simulations is implemented using available open source software packages - Deal.ii, Trilinos, PARALUTION and AGMG. These packages provide toolboxes with state-of-art implementations of iterative solution methods and preconditioners for multicore computer platforms and GPU. We present performance results in terms of numerical and computational efficiency, number of iterations and execution time, and compare the timing results against a sparse direct solver from a commercial finite element package, that is often used by applied scientists in their simulations.} } @TechReport{ it:2014-006, author = {H{\"o}{\"o}k, Lars Josef}, title = {On the Bimodality in an Introductory Programming Course: an Analysis of Student Performance Factors}, institution = it, department = tdb, year = 2014, number = {2014-006}, month = mar, abstract = {In this work, the student performance in an introductory programming course given at the department of information technology at Uppsala University is analyzed. The results from the final exam are studied and the potential bimodality of the distribution is investigated. A questionnaire, answered by the students at the final exam is analyzed, including study behavior and potential connections to the performance at the final exam.} } @TechReport{ it:2014-005, author = {Andreas Sandberg and Erik Hagersten and David Black-Schaffer}, title = {Full Speed Ahead: Detailed Architectural Simulation at Near-Native Speed}, institution = it, department = docs, year = 2014, number = {2014-005}, month = mar, abstract = {Popular microarchitecture simulators are typically several orders of magnitude slower than the systems they simulate. This leads to two problems: First, due to the slow simulation rate, simulation studies are usually limited to the first few billion instructions, which corresponds to less than 10\% the execution time of many standard benchmarks. Since such studies only cover a small fraction of the applications, they run the risk of reporting unrepresentative application behavior unless sampling strategies are employed. Second, the high overhead of traditional simulators make them unsuitable for hardware/software co-design studies where rapid turn-around is required. In spite of previous efforts to parallelize simulators, most commonly used full-system simulations remain single threaded. In this paper, we explore a simple and effective way to parallelize sampling full-system simulators. In order to simulate at high speed, we need to be able to efficiently fast-forward between sample points. We demonstrate how hardware virtualization can be used to implement highly efficient fast-forwarding in the standard gem5 simulator and how this enables efficient execution between sample points. This extremely rapid fast-forwarding enables us to reach new sample points much quicker than a single sample can be simulated. Together with efficient copying of simulator state, this enables parallel execution of sample simulation. These techniques allow us to implement a highly scalable sampling simulator that exploits sample-level parallelism. We demonstrate how virtualization can be used to fast-forward simulators at 90\% of native execution speed on average. Using virtualized fast-forwarding, we demonstrate a parallel sampling simulator that can be used to accurately estimate the IPC of standard workloads with an average error of 2.2\% while still reaching an execution rate of 2.0 GIPS (63\% of native) on average. We demonstrate that our parallelization strategy scales almost linearly and simulates one core at up to 93\% of its native execution rate, 19,000x faster than detailed simulation, while using 8 cores.} } @TechReport{ it:2014-004, author = {Emil Kieri}, title = {Stiff Convergence of Force-Gradient Operator Splitting Methods}, institution = it, department = tdb, year = 2014, number = {2014-004}, month = feb, abstract = {We consider force-gradient, also called modified potential, operator splitting methods for problems with unbounded operators. We prove that force-gradient operator splitting schemes retain their classical orders of accuracy for time-dependent partial differential equations of parabolic or Schr{\"o}dinger type, provided that the solution is sufficiently regular.} } @TechReport{ it:2014-003, author = {{\AA}sa Cajander and Thomas Lind and Gerolf Nauwerck and Britta Nyberg and Bengt Sandblad}, title = {Ekonomiadministrat{\"o}rernas {IT}-relaterade arbetsmilj{\"o} vid {U}ppsala universitet - En kortfattad rapport fr{\aa}n {KIA}-projektet}, institution = it, department = vi2, year = 2014, number = {2014-003}, month = feb, note = {In Swedish.}, abstract = { Denna rapport inneh{\aa}ller en kartl{\"a}ggning av ekonomiadministrat{\"o}rers, ekonomiassistenters och ekonomiadministrat{\"o}rers digitala arbetsmilj{\"o} p{\aa} Uppsala universitet. Rapporten {\"a}r skriven med utg{\aa}ngspunkt fr{\aa}n att l{\"a}saren har inblick i hur ekonomiassistenterekonomiadministrat{\"o}rer arbetar idag p{\aa} Uppsala universitet, samt har kunskap kring de olika IT-system som anv{\"a}nds. Kartl{\"a}ggningen gjordes under h{\"o}sten 2013 d{\aa} 12 ekonomiassistenter ekonomiadministrat{\"o}rer bes{\"o}ktes under 2-3 timmar och metoden Contextual Inquiry anv{\"a}ndes. Inledningsvis f{\"o}rklaras begreppen arbetsmilj{\"o} och IT, och Robert Karaseks modell med dimensionerna krav kontroll och st{\"o}d presenteras. Dessa dimensioner ligger sedan till grund f{\"o}r kartl{\"a}ggningen som kortfattat presenterar de problem som har identifierats kopplat till den digitala arbetsmilj{\"o}n. N{\aa}gra av de problem som identifierats i kartl{\"a}ggningen {\"a}r avsaknaden av {\"o}verblick, st{\"a}ndiga avbrott, och fragmenterad information. Rapporten avslutas med en lista med rekommendationer p{\aa} f{\"o}rb{\"a}ttringar.} } @TechReport{ it:2014-002, author = {Peter Hellstr{\"o}m}, title = {Problems in the Integration of Timetabling and Train Traffic Control}, institution = it, department = vi2, year = 2014, number = {2014-002}, month = jan } @TechReport{ it:2014-001, author = {Martin Tillenius and Elisabeth Larsson and Rosa M. Badia and Xavier Martorell}, title = {Resource-Aware Task Scheduling}, institution = it, department = tdb, year = 2014, number = {2014-001}, month = jan, abstract = {Dependency-aware task-based parallel programming models have proven to be successful for developing efficient application software for multicore-based computer architectures. The programming model is amenable to programmers, thereby supporting productivity, while hardware performance is achieved through a run-time system that dynamically schedules tasks onto cores in such a way that all dependencies are respected. However, even if the scheduling is completely successful with respect to load balancing, the scaling with the number of cores may be sub-optimal due to resource contention. Here we consider the problem of scheduling tasks not only with respect to their inter-dependencies, but also with respect to their usage of resources such as memory and bandwidth. At the software level, this is achieved by user annotations of the task resource consumption. In the run-time system, the annotations are translated into scheduling constraints. Experimental results for different hardware, demonstrating performance gains both for model examples and real applications are presented. Furthermore, we provide a set of tools to detect resource sensitivity and predict the performance improvements that can be achieved by resource-aware scheduling. These tools are solely based on parallel execution traces and require no instrumentation or modification of the application code.} } @TechReport{ it:2013-027, author = {Bengt Sandblad}, title = {Checklista f{\"o}r god arbetsmilj{\"o} vid datorst{\"o}tt arbete}, institution = it, department = vi2, year = 2013, number = {2013-027}, month = dec, note = {In Swedish.}, abstract = {Redan idag sk{\"o}ts det mesta av allt arbete inom administrativt arbete, {\"a}rendehantering m.m. med hj{\"a}lp av datorer. Att arbetet {\"a}r datorst{\"o}tt inneb{\"a}r givetvis inte att de professionella som ska utf{\"o}ra det direkta arbetet har som huvuduppgift att hantera en dator. De ska kunna utf{\"o}ra sitt arbete p{\aa} ett effektivt s{\"a}tt och ha en s{\aa} bra arbetssituation som m{\"o}jligt. Sj{\"a}lvfallet ska ocks{\aa} arbetet pr{\"a}glas av en god arbetsmilj{\"o}. Det h{\"a}r dokumentet beskriver hur man kan utv{\"a}rdera arbetsmilj{\"o}- och h{\"a}lsoaspekter i syfte att f{\"o}rebygga eller {\aa}tg{\"a}rda olika slags arbetsmilj{\"o}problem. Inneh{\aa}llet utg{\aa}r i m{\aa}ngt och mycket fr{\aa}n att arbetet utf{\"o}rs med hj{\"a}lp av IT-st{\"o}d, men en hel del {\"a}r relevant {\"a}ven f{\"o}r arbeten som inte i huvudsak st{\"o}ds av IT. Basen i utv{\"a}rderingen {\"a}r en checklista som finns i slutet av dokumentet. Checklistan kan anv{\"a}ndas f{\"o}r att utv{\"a}rdera befintliga arbeten, men den kan ocks{\aa} anv{\"a}ndas i f{\"o}rebyggande syfte. } } @TechReport{ it:2013-026, author = {Sofia Cassel and Falk Howar and Bengt Jonsson and Maik Merten and Bernhard Steffen}, title = {A Succinct Canonical Register Automaton Model}, institution = it, department = docs, year = 2013, number = {2013-026}, month = dec, note = {This is an extended version of a paper published in ATVA 2011. The extended version has been accepted for publication in JLAP.}, abstract = {We present a novel canonical automaton model, based on register automata, that can be used to specify protocol or program behavior. Register automata have a finite control structure and a finite number of registers (variables), and process sequences of terms that carry data values from an infinite domain. We consider register automata that compare data values for equality. A major contribution is the definition of a canonical automaton representation of any language recognizable by a deterministic register automaton, by means of a Nerode congruence. This canonical form is well suited for modeling, e.g., protocols or program behavior. Our model can be exponentially more succinct than previous proposals, since it filters out 'accidental' relations between data values. This opens the way to new practical applications, e.g., in automata learning.} } @TechReport{ it:2013-025, author = {Jonatan Lind{\'e}n and Bengt Jonsson}, title = {A Skiplist-Based Concurrent Priority Queue with Minimal Memory Contention}, institution = it, department = docs, year = 2013, number = {2013-025}, month = dec, note = {Updated by Technical Report 2018-003, February 2018. See \url{http://www.it.uu.se/research/publications/reports/2018-003}.} , abstract = {Priority queues are fundamental to many multiprocessor applications. Several priority queue algorithms based on skiplists have been proposed, as skiplists allow concurrent accesses to different parts of the data structure in a simple way. However, for priority queues on multiprocessors, an inherent bottleneck is the operation that deletes the minimal element. We present a linearizable, lock-free, concurrent priority queue algorithm, based on skiplists, which minimizes the contention for shared memory that is caused by the {\sc DeleteMin} operation. The main idea is to minimize the number of global updates to shared memory that are performed in one {\sc DeleteMin}. In comparison with other skiplist-based priority queue algorithms, our algorithm achieves a 30 -- 80\% improvement.} } @TechReport{ it:2013-024, author = {Thomas Lind and Fredrik Brattl{\"o}f and {\AA}sa Cajander and Bengt Sandblad and Bengt G{\"o}ransson and Anders Jansson}, title = {F{\"o}rstudierapport: Inf{\"o}rande av verksamhetsst{\"o}djande {IT}-system. Problem, effekter och nytta}, institution = it, department = vi2, year = 2013, number = {2013-024}, month = dec, note = {In Swedish.}, abstract = {Under h{\"o}sten 2011 har en f{\"o}rstudie genomf{\"o}rts f{\"o}r att kartl{\"a}gga och analysera dagens processer f{\"o}r utveckling, anskaffande, inf{\"o}rande och utv{\"a}rdering av administrativa IT-system vid Uppsala universitet (UU). F{\"o}rstudien, Inf{\"o}rande av verksamhetsst{\"o}djande IT-system. Problem, effekter och nytta, har genomf{\"o}rts av forskare vid avdelningen f{\"o}r m{\"a}nniska-datorinteraktion (MDI) vid institutionen f{\"o}r informationsteknologi, UU. F{\"o}rstudien har gjorts p{\aa} uppdrag av universitetsdirekt{\"o}r Ann Fust och kontaktperson har varit Britta Nyberg, enheten f{\"o}r planering, uppf{\"o}ljning och st{\"o}d f{\"o}r verksamhetsutveckling. Arbetet har dels best{\aa}tt av en kartl{\"a}ggning och analys av dagens utvecklings- och inf{\"o}randeprocesser och de problem man upplever i samband med dessa, dels n{\aa}gra analyser av hur anv{\"a}ndare uppfattar de IT-system man idag har. Sammanfattningsvis har kartl{\"a}ggningen och analysen visat att det finns en stor f{\"o}rb{\"a}ttringspotential n{\"a}r det g{\"a}ller processer f{\"o}r verksamhets- och IT-utveckling samt hur anv{\"a}ndbarhetsaspekter beaktas i utveckling, anskaffande, inf{\"o}rande och utv{\"a}rdering av IT-system. Kartl{\"a}ggningen av processer som anv{\"a}nds vid UU visar att det inte finns n{\aa}gra enhetliga processer som anv{\"a}nds generellt inom UU, men att det lokalt finns fungerande s{\aa}dana. N{\aa}gra exempel {\"a}r f{\"o}rvaltningen som sedan n{\aa}gra {\aa}r arbetar enligt PM3, och avdelningen f{\"o}r IT och ink{\"o}p som anv{\"a}nder sig av ITIL. I de idag p{\aa}g{\aa}ende IT-projekten varierar arbetss{\"a}ttet mycket. Det finns inga rekommenderade processer eller arbetss{\"a}tt utan projektledaren l{\"a}gger upp arbetet baserat p{\aa} egna erfarenheter. Inom UU arbetar man generellt inte med anv{\"a}ndarcentrerade processer, och anv{\"a}ndbarhetsaspekter {\"a}r inte prioriterade. Inom vissa projekt och i viss f{\"o}rvaltning f{\"o}rekommer en del arbete med fokus p{\aa} anv{\"a}ndbarhet. Referensgrupper anv{\"a}nds som en form av anv{\"a}ndarmedverkan, men med olika m{\aa}l och syften. Inf{\"o}randeprocesser upplevs generellt som problematiska inom UU och h{\"a}r finns p{\aa}tagliga problem. Den upplevda anv{\"a}ndbarheten i de administrativa IT-systemen varierar, men generellt anser f{\"o}rstudiens intervjupersoner att det finns stor f{\"o}rb{\"a}ttringspotential. Fallstudien av Raindanceportalen visar att olika anv{\"a}ndargrupper upplever systemet p{\aa} helt olika s{\"a}tt. De som har utbildning i Raindance (ekonomiadministrat{\"o}rer) och anv{\"a}nder det i sina k{\"a}rnuppgifter upplever att systemet i ganska stor utstr{\"a}ckning st{\"o}der arbetet. {\"O}vriga anv{\"a}ndare som anv{\"a}nder systemet mer s{\"a}llan och som administrativt st{\"o}dsystem har klart st{\"o}rre problem med systemet. I en avslutande diskussion ger vi en sammanst{\"a}llning av de observerade problemen, ger f{\"o}rslag p{\aa} vad som fordras f{\"o}r att utveckla kunskaper kring problem och m{\"o}jligheter och f{\"o}r att inf{\"o}ra f{\"o}rb{\"a}ttrade processer, samt f{\"o}r att ~ p{\aa} sikt ~ f{\aa} b{\"a}ttre och effektivare administrativa IT-system inom UU.} } @TechReport{ it:2013-023, author = {Ali Safdari-Vaighani and Alfa Heryudono and Elisabeth Larsson}, title = {A Radial Basis Function Partition of Unity Collocation Method for Convection-Diffusion Equations}, institution = it, department = tdb, year = 2013, number = {2013-023}, month = nov, abstract = {Numerical solution of multi-dimensional PDEs is a challenging problem with respect to computational cost and memory requirements, as well as regarding representation of realistic geometries and adaption to solution features. Meshfree methods such as global radial basis function approximation have been successfully applied to several types of problems. However, due to the dense linear systems that need to be solved, the computational cost grows rapidly with dimension. In this paper, we instead propose to use a locally supported RBF collocation method based on a partition of unity approach to numerically solve time-dependent PDEs. We investigate the stability and accuracy of the method for convection-diffusion problems in two space dimensions as well as for an American option pricing problem. The numerical experiments show that we can achieve both spectral and high-order algebraic convergence for convection-diffusion problems, and that we can reduce the computational cost for the option pricing problem by adapting the node layout to the problem characteristics. } } @TechReport{ it:2013-022, author = {Lina Meinecke and Per L{\"o}tstedt}, title = {Stochastic Diffusion Processes on {C}artesian Meshes}, institution = it, department = tdb, year = 2013, number = {2013-022}, month = nov, abstract = {Diffusion of molecules is simulated stochastically by letting them jump between voxels in a Cartesian mesh. The jump coefficients are first derived using finite difference, finite element, and finite volume approximations of the Laplacian on the mesh. An alternative is to let the first exit time for a molecule in random walk in a voxel define the jump coefficient. Such coefficients have the advantage of always being non-negative. These four different ways of obtaining the diffusion propensities are compared theoretically and in numerical experiments.} } @TechReport{ it:2013-021, author = {Bengt Carlsson and Jes{\'u}s Zambrano}, title = {Fault Detection and Isolation of Sensors in Aeration Control Systems -- the Airflow Ratio Method}, institution = it, department = syscon, year = 2013, number = {2013-021}, month = oct, abstract = {In this paper, we consider the problem of detecting sensor faults in the aeration system of an activated sludge process. The purpose is to detect possible faults in the dissolved oxygen sensors. The dissolved oxygen concentration in each aerated zone is assumed to be automatically controlled. As the basis for a fault detection algorithm we propose to use the ratio of air flow rates into different zones. The method is evaluated by using the Benchmark Simulation Model n^{o}1 (BSM1) via Monte Carlo simulations. Results show that this method gives a good performance in terms of correct an early fault detection and isolation.} } @TechReport{ it:2013-020, author = {Dimitar Lukarski and Tobias Skoglund}, title = {A Priori Power Estimation of Linear Solvers on Multi-Core Processors}, institution = it, department = tdb, year = 2013, number = {2013-020}, month = sep, abstract = {High-performance computing (HPC) centres simulate complex scientific models which provide vital understanding of our world. In the recent years, power efficiency has become a critical aspect in the new HPC facilities because of high energy consumption costs. In this work, we present our study on power consumption of linear solvers on modern multi-core CPUs which are widely used in many scientific applications. We focus on both dense and sparse linear solvers - parallel direct solvers for the dense, and parallel iterative methods for the sparse problems. We provide answers to the questions -- what is the energy efficiency using multi-core parallel algorithms for linear systems and do we save energy using more cores? Furthermore, we propose a methodology for estimating total power consumption. Based on benchmarks which achieve high accuracy we estimate total power usage of the whole solution phase on multi-core CPUs.} } @TechReport{ it:2013-019, author = {Emil Kieri and Gunilla Kreiss and Olof Runborg}, title = {Coupling of {G}aussian Beam and Finite Difference Solvers for Semiclassical {S}chr{\"o}dinger Equations}, institution = it, department = tdb, year = 2013, number = {2013-019}, month = sep, abstract = {In the semiclassical regime, solutions to the time-dependent Schr{\"o}dinger equation are highly oscillatory. The number of grid points required for resolving the oscillations may become very large even for simple model problems, making solution on a grid, e.g., using a finite difference method, intractable. Asymptotic methods like Gaussian beams can resolve the oscillations with little effort and yield good approximations when the molecules are heavy and the potential is smooth. However, when the potential has variations on a small length-scale, quantum phenomena become important. Then asymptotic methods are less accurate. The two classes of methods perform well in different parameter regimes. This opens for hybrid methods, using Gaussian beams where we can and finite differences where we have to. We propose a new method for treating the coupling between the finite difference method and Gaussian beams. The new method reduces the needed amount of overlap regions considerably compared to previous methods, which improves the efficiency. We apply the method to scattering problems in one and two dimensions. } } @TechReport{ it:2013-018, author = {Martin Almquist and Ilkka Karasalo and Ken Mattsson}, title = {Atmospheric Sound Propagation over Large-Scale Irregular Terrain}, institution = it, department = tdb, year = 2013, number = {2013-018}, month = sep, abstract = {A benchmark problem on atmospheric sound propagation over irregular terrain has been solved using a stable fourth-order accurate finite difference approximation of a high-fidelity acoustic model. A comparison with the parabolic equation method and ray tracing methods is made. The results show that ray tracing methods can potentially be unreliable in the presence of irregular terrain.} } @TechReport{ it:2013-017, author = {T. S{\"o}derstr{\"o}m and J. Yuz}, title = {Analysing Model Validation Methods for Errors-in-Variables Estimation}, institution = it, department = syscon, year = 2013, number = {2013-017}, month = sep, abstract = {When identifying a dynamic system the model has to be validated as well. For an errors-in-variables situation where both input and output measurements are noise corrupted, this is a nontrivial task, seldom treated in the literature. Some different approaches for model validation are introduced and evaluated by theoretical analysis as well as application to simulated data.} } @TechReport{ it:2013-016, author = {Ken Mattsson and Martin Almquist and Mark H. Carpenter}, title = {Optimal Diagonal-Norm {SBP} Operators}, institution = it, department = tdb, year = 2013, number = {2013-016}, month = sep, abstract = {Optimal boundary closures are derived for first derivative, finite difference operators of order $2$, $4$, $6$ and $8$. The closures are based on a diagonal-norm summation-by-parts (SBP) framework, thereby guaranteeing linear stability on piecewise curvilinear multi-block grids and entropy stability for nonlinear equations that support a convex extension. The new closures are developed by enriching conventional approaches with additional boundary closure stencils and non-equidistant grid distributions at the domain boundaries. Greatly improved accuracy is achieved near the boundaries, as compared with traditional diagonal norm operators of the same order. The superior accuracy of the new optimal diagonal-norm SBP operators is demonstrated for linear hyperbolic systems in one dimension and for the nonlinear compressible Euler equations in two dimensions.} } @TechReport{ it:2013-015, author = {Olov Ros{\'e}n and Alexander Medvedev}, title = {Parallel Recursive Bayesian Estimation on Multicore Computational Platforms Using Orthogonal Basis Functions}, institution = it, department = syscon, year = 2013, number = {2013-015}, month = aug, abstract = {A method to solve the recursive Bayesian estimation problem by making use of orthogonal series expansions of the involved probability density functions is presented. The coefficients of the expansion for the posterior density are then calculated recursively via prediction and update equations. The method has two main benefits: it provides high estimation accuracy at a relatively low computational cost and it is highly amenable to parallel implementation. An application to a bearings-only tracking problem shows that the proposed method performs with the same accuracy as the particle filter but at a 24 times lower computational cost. A parallel implementation on a shared-memory multicore machine demonstrates that linear speedup in the number of cores is achievable.} } @TechReport{ it:2013-014, author = {{\AA}man Pohjola, Johannes and Johannes Borgstr{\"o}m and Joachim Parrow and Palle Raabjerg and Ioana Rodhe}, title = {Negative Premises in Applied Process Calculi}, institution = it, department = csd, year = 2013, number = {2013-014}, month = jun, abstract = {We explore two applications of negative premises to increase the expressive power of psi-calculi: reliable broadcasts and priorities. Together, these can be used to model discrete time, which we illustrate with an example from automotive applications. The negative premises can be encoded by a two-level structural operational semantics without negative premises; we use this fact to prove the standard congruence and structural laws of bisimulation with Nominal Isabelle.} } @TechReport{ it:2013-013, author = {Jens Berg and Jan Nordstr{\"o}m}, title = {Duality Based Boundary Conditions and Dual Consistent Finite Difference Discretizations of the {N}avier-{S}tokes and {E}uler Equations}, institution = it, department = tdb, year = 2013, number = {2013-013}, month = may, abstract = {In this paper we derive new farfield boundary conditions for the time-dependent Navier-Stokes and Euler equations in two space dimensions. The new boundary conditions are derived by simultaneously considering well-posedess of both the primal and dual problems. We moreover require that the boundary conditions for the primal and dual Navier-Stokes equations converge to well-posed boundary conditions for the primal and dual Euler equations. We perform computations with a high-order finite difference scheme on summation-by-parts form with the new boundary conditions imposed weakly by the simultaneous approximation term. We prove that the scheme is both energy stable and dual consistent and show numerically that both linear and non-linear integral functionals become superconvergent.} } @TechReport{ it:2013-012, author = {Mikael Erlandsson and Anders Jansson}, title = {General Domain Properties in Vehicle Operation: A Comparison between Trains and High-Speed Ferries}, institution = it, department = vi2, year = 2013, number = {2013-012}, month = may, abstract = {This paper focus on decision making in the domain of vehicle driving, and the goal is to find out what train-drivers have in common with officers of high-speed ferries in terms of general domain properties. Such properties may be important to identify because they can say something about what constraints these rather diverse work domains may have in common. Consequently, design ideas within one domain can be transferred to another, bearing in mind that tasks, strategies as well as domain-specific knowledge and worker competencies, still have to be introduced in the final design of any artefact. A new information acquisition method called collegial verbalisation is used to study vehicle drivers of trains and high speed ferries. Central properties of the vehicle operators' work are identified, and a comparison is made between the two very different types of vehicles. Three specific topics from the results are discussed in detail, and some conclusions are made about the generality of them with respect to other domains in transportation. Two of the topics, operators' spatial division and temporal perspective, are also discussed in relation to the concept of situation awareness.} } @TechReport{ it:2013-011, author = {Daniel Elfverson and Axel M{\aa}lqvist}, title = {Discontinuous {G}alerkin Multiscale Methods for Convection Dominated Problems}, institution = it, department = tdb, year = 2013, number = {2013-011}, month = may, abstract = {We propose an extension of the discontinuous Galerkin multiscale method, presented in [11], to convection dominated problems with rough, heterogeneous, and highly varying coefficients. The properties of the multiscale method and the discontinuous Galerkin method allows us to better cope with multiscale features as well as boundary layers in the solution. In the proposed method the trail and test spaces are spanned by a corrected basis calculated on localized patches of size $\mathcal{O}(H\log(H^{-1}))$, where $H$ is the mesh size. We prove convergence rates independent of the variation in the coefficients and present numerical experiments which verify the analytical findings.} } @TechReport{ it:2013-010, author = {Mark B. Flegg and Stefan Hellander and Radek Erban}, title = {Convergence of Methods for Coupling of Microscopic and Mesoscopic Reaction-Diffusion Simulations}, institution = it, department = tdb, year = 2013, number = {2013-010}, month = apr, abstract = {In this paper, three multiscale methods for coupling of mesoscopic (compartment-based) and microscopic (molecular-based) stochastic reaction-diffusion simulations are investigated. Two of the three methods that will be discussed in detail have been previously reported in the literature; the two-regime method (TRM) and the compartment-placement method (CPM). The third method that is introduced and analysed in this paper is the ghost cell method (GCM). Presented is a comparison of sources of error. The convergent properties of this error are studied as the time step $\Delta t$ (for updating the molecular-based part of the model) approaches zero. It is found that the error behaviour depends on another fundamental computational parameter $h$, the compartment size in the mesoscopic part of the model. Two important limiting cases, which appear in applications, are considered: \par $\,$(i) $\Delta t \to 0$ and $h$ is fixed; \par (ii) $\Delta t \to 0$ and $h \rightarrow 0$ such that $\sqrt{\Delta t}/h$ is fixed. \\ The error for previously developed approaches (the TRM and CPM) converges to zero only in the limiting case (ii), but not in case (i). It is shown that the error of the GCM converges in the limiting case (i). Thus the GCM is superior to previous coupling techniques if the mesoscopic description is much coarser than the microscopic part of the model.} } @TechReport{ it:2013-009, author = {Stefan Hellander}, title = {Single Molecule Simulations in Complex Geometries with Embedded Dynamic One-Dimensional Structures}, institution = it, department = tdb, year = 2013, number = {2013-009}, month = apr, abstract = {Stochastic models of reaction-diffusion systems are important for the study of bio- chemical reaction networks where species are present in low copy numbers or if reac- tions are highly diffusion limited. In living cells many such systems include reactions and transport on one-dimensional structures, such as DNA and microtubules. The cytoskeleton is a dynamic structure where individual fibers move, grow and shrink. In this paper we present a simulation algorithm that combines single molecule simula- tions in three-dimensional space with single molecule simulations on one-dimensional structures of arbitrary shape. Molecules diffuse and react with each other in space, they associate to and dissociate from one-dimensional structures as well as diffuse and react with each other on the one-dimensional structure. A general curve embed- ded in space can be approximated by a piecewise linear curve to arbitrary accuracy. The resulting algorithm is hence very flexible. Molecules bound to a curve can move by pure diffusion or via active transport, and the curve can move in space as well as grow and shrink. The flexibility and accuracy of the algorithm is demonstrated in four numerical examples.} } @TechReport{ it:2013-008, author = {Nan Guan and Pontus Ekberg and Martin Stigge and Wang Yi}, title = {Improving the Scheduling of Certifiable Mixed-Criticality Sporadic Task Systems}, institution = it, department = docs, year = 2013, number = {2013-008}, month = apr, abstract = {An increasing trend in embedded system design is to integrate components with different levels of criticality into a shared hardware platform for better cost and power efficiency. Such mixed-criticality systems are subject to certifications at different levels of rigorousness, for validating the correctness of different subsystems on various confidence levels. The realtime scheduling of certifiable mixed-criticality systems has been recognized to be a challenging problem, where using traditional scheduling techniques may result in unacceptable resource waste. In this paper we present an algorithm called PLRS to schedule certifiable mixed-criticality sporadic tasks systems. PLRS uses fixed-job-priority scheduling, and assigns job priorities by exploring and balancing the asymmetric effects between the workload on different criticality levels. Comparing with the state-of-the-art algorithm by Li and Baruah for such systems, which we refer to as LB, PLRS is both more effective and more efficient: (i) The schedulability test of PLRS not only theoretically dominates, but also on average significantly outperforms LB~s. (ii) The run-time complexity of PLRS is polynomial (quadratic in the number of tasks), which is much more efficient than the pseudo-polynomial run-time complexity of LB.} } @TechReport{ it:2013-007, author = {Emil Kieri}, title = {Accelerated Convergence for {S}chr{\"o}dinger Equations with Non-Smooth Potentials}, institution = it, department = tdb, year = 2013, number = {2013-007}, month = apr, abstract = {When numerically solving the time-dependent Schr{\"o}dinger equation for the electrons in an atom or molecule, the Coulomb singularity poses a challenge. The solution will have limited regularity, and high-order spatial discretisations, which are much favoured in the chemical physics community, are not performing to their full potential. By exploiting knowledge about the jumps in the derivatives of the solution we construct a correction, and show how this improves the convergence rate of Fourier collocation from second to fourth order. This allows for a substantial reduction in the number of grid points. The new method is applied to the higher harmonic generation from atomic hydrogen.} } @TechReport{ it:2013-006, author = {Torbj{\"o}rn Wigren and Johan Schoukens}, title = {Data for Benchmarking in Nonlinear System Identification}, institution = it, department = syscon, year = 2013, number = {2013-006}, month = mar, note = {The data can be downloaded from \url{http://www.it.uu.se/research/publications/reports/2013-006/SNLA80mVZipped.zip}.} , abstract = {System identification is a fundamentally experimental field of science in that it deals with modeling of system dynamics using measured data. Despite this fact many algorithms and theoretical results are only tested with simulations at the time of publication. One reason for this may be a lack of easily available live data. This paper therefore presents three sets of data, suitable for development, testing and benchmarking of system identification algorithms for nonlinear systems. The data sets are collected from laboratory processes that can be described by block ~ oriented dynamic models, and by more general nonlinear difference and differential equation models. All data sets are available for free download.} } @TechReport{ it:2013-005, author = {Owe Axelsson and Maya Neytcheva and Bashir Ahmad}, title = {A Comparison of Iterative Methods to Solve Complex Valued Linear Algebraic Systems}, institution = it, department = tdb, year = 2013, number = {2013-005}, month = mar, abstract = {Complex valued linear algebraic systems arise in many important applications. We present analytical and extensive numerical comparisons of some available numerical solution methods. It is advocated, in particular for large scale ill-conditioned problems, to rewrite the complex-valued system in real valued form leading to a two-by-two block system of particular form, for which it is shown that a very efficient and robust preconditioned iterative solution methods can be constructed. Alternatively, in many cases it turns out that a simple preconditioner in the form of the sum of the real and the imaginary part of the matrix also works well but involves complex arithmetics. } } @TechReport{ it:2013-004, author = {Peter Hansbo and Mats G. Larson and Sara Zahedi}, title = {Characteristic Cut Finite Element Methods for Convection-Diffusion Problems on Time Dependent Surfaces}, institution = it, department = tdb, year = 2013, number = {2013-004}, month = mar, abstract = {We develop a finite element method for convection diffusion problems on a given time dependent surface, for instance modeling the evolution of a surfactant. The method is based on a characteristic-Galerkin formulation combined with a piecewise linear cut finite element method in space. The cut finite element method is constructed by embedding the surface in a background grid and then using the restriction to the surface of a finite element space defined on the background grid. The surface is allowed to cut through the background grid in an arbitrary fashion. To ensure well posedness of the resulting algebraic systems of equations, independent of the position of the surface in the background grid, we add a consistent stabilization term. We prove error estimates and present confirming numerical results.} } @TechReport{ it:2013-003, author = {Rebecka Janols and Bengt G{\"o}ransson and Bengt Sandblad}, title = {Slutrapport f{\"o}r Projektet {IVAN} 2008-2011}, institution = it, department = vi2, year = 2013, number = {2013-003}, month = mar, note = {In Swedish.}, abstract = {Projektet IVAN (Informationssystem i v{\aa}rden Anv{\"a}ndbarhet och nytta) var ett samarbetsprojekt mellan Landstinget Uppsala L{\"a}n (LUL) och forskare vid institutionen f{\"o}r informationsteknologi, avdelningen m{\"a}nniska datorinteraktion vid Uppsala universitet (UU). Projektet startade med en f{\"o}rstudie p{\aa} Akademiska sjukhuset h{\"o}sten 2008 och har mellan jan 2010- dec 2011 fungerat som ett Landstingsprojekt. Det {\"o}vergripande m{\aa}let med IVAN var att skapa en b{\"a}ttre anv{\"a}ndning av IT i v{\aa}rden lokalt p{\aa} LUL:s f{\"o}rvaltningar (Akademiska sjukhuset, Prim{\"a}rv{\aa}rden, Hj{\"a}lpmedel och habilitering samt lasarettet i Enk{\"o}ping).I den h{\"a}r rapporten presenteras slutsatser fr{\aa}n 2.5 {\aa}rs forskarstudier.} } @TechReport{ it:2013-002, author = {Rebecka Janols and Bengt Sandblad and Bengt G{\"o}ransson and Thomas Lind}, title = {Anv{\"a}ndarnas synpunkter p{\aa} {IT}-l{\"o}sningar i v{\aa}rden - {S}ammanfattande erfarenheter fr{\aa}n utv{\"a}rderingar vid {L}andstinget i {U}ppsala l{\"a}n (2008-2012)}, institution = it, department = vi2, year = 2013, number = {2013-002}, month = mar, note = {In Swedish. Updated title 2013-03-16.}, abstract = {Det st{\"a}ndigt p{\aa}g{\aa}ende IT-f{\"o}r{\"a}ndringsarbetet inom v{\aa}rden har potentialen att bidra till {\"o}kad v{\aa}rdkvalitet, effektivitet, patients{\"a}kerhet, patientservice och en f{\"o}rb{\"a}ttrad arbetsmilj{\"o}. Om man lyckas med detta {\"a}r IT-investeringar nyttiga f{\"o}r v{\aa}rden, patienterna och landstinget. Idag finns emellertid en d{\aa}lig kunskap om vilken nyttan {\"a}r och det saknas modeller f{\"o}r att ber{\"a}kna den. All erfarenhet fr{\aa}n det hittills genomf{\"o}rda arbetet, liksom fr{\aa}n mycket annan tidigare forskning, pekar p{\aa} att det finns stora problem med utveckling och inf{\"o}rande av IT i v{\aa}rden. Problemen best{\aa}r dels i att man inte f{\"o}rm{\aa}r utnyttja den nyttopotential som finns, dels att s{\aa}v{\"a}l IT-system som f{\"o}r{\"a}ndringsprocesserna inte h{\aa}ller nog h{\"o}g kvalitet, vilket resulterar i negativa reaktioner och effekter hos anv{\"a}ndarna, dvs. hos v{\aa}rdpersonalen. Denna rapport presenterar en sammanfattning av anv{\"a}ndarnas synpunkter p{\aa} IT l{\"o}sningar i v{\aa}rden. Inneh{\aa}llet i rapporten {\"a}r grundat i b{\aa}de kvalitativa och kvantitativa studier genomf{\"o}rt inom ramen for projektet IVAN. Projektet IVAN (Informationssystem i v{\aa}rden Anv{\"a}ndbarhet och Nytta) {\"a}r ett samarbetsprojekt mellan Landstinget Uppsala L{\"a}n (LUL) och forskare vid avdelningen for visuell information och interaktion, Institutionen f{\"o}r informationsteknologi vid Uppsala universitet. Projektet startade med en 6 m{\aa}naders f{\"o}rstudie p{\aa} Akademiska sjukhuset h{\"o}sten 2008 och har mellan jan 2010- dec 2011 fungerat som ett Landstingsprojekt. Projektet var ett IVAN har till stor del fokuserat p{\aa} kartl{\"a}ggningar och analyser av de system, processer och organisation som finns runt dagens IT-system. Bilden av de problem som finns idag, s{\"a}rskilt n{\"a}r det g{\"a}ller v{\aa}rdpersonalens upplevda situation, har klarnat. M{\aa}nga behov av f{\"o}r{\"a}ndringar och f{\"o}rb{\"a}ttringar har framkommit. } } @TechReport{ it:2013-001, author = {Rebecka Janols and Erik Bor{\"a}lv}, title = {{I}nformationssystem i {V}{\aa}rden ~ {A}nv{\"a}ndbarhet och {N}ytta ({IVAN}). F{\"o}rstudierapport 2008.}, institution = it, department = vi2, year = 2013, number = {2013-001}, month = jan, note = {In Swedish. Updated title 2013-03-16.}, abstract = {Under h{\"o}sten 2008 har ett samarbetsprojekt mellan Akademiska sjukhuset och Uppsala universitet p{\aa}b{\"o}rjats. Det har handlat om att fram mer anv{\"a}ndbara IT-st{\"o}d d{\"a}r m{\aa}let {\"a}r att IT-anv{\"a}ndningen ska bidra till ett effektivt och h{\aa}llbart arbete. Under h{\"o}sten har vi genomf{\"o}rt aktiviteter i form av intervjuer, observationer, enk{\"a}ter samt medverkat i olika personalaktiviteter p{\aa} tre enheter p{\aa} Akademiska sjukhuset: ortopedmottagningen, 30e och 96c. Slutsatserna presenterar vi i form av fem {\"o}vergripande rekommendationer inf{\"o}r det fortsatta arbetet. De fem rekommendationerna {\"a}r: 1) Upps{\"o}kande supportverksamhet, 2) Jourhavande jurist, 3) Mandat, 4) Utv{\"a}rdera samt 5) utbildning.} } @TechReport{ it:2012-036, author = {Pavol Bauer and Brian Drawert and Stefan Engblom and Andreas Hellander}, title = {URDME v. 1.2: User's manual}, institution = it, department = tdb, year = 2012, number = {2012-036}, month = dec, note = {Also available at \url{http://arxiv.org/abs/0902.2912}}, abstract = {We have developed URDME, a general software for simulation of stochastic reaction-diffusion processes on unstructured meshes. This allows for a more flexible handling of complicated geometries and curved boundaries compared to simulations on structured, cartesian meshes. The underlying algorithm is the next subvolume method, extended to unstructured meshes by obtaining jump coefficients from a finite element formulation of the corresponding macroscopic equation. This manual describes version 1.2 of the software. URDME 1.2 includes support for Comsol Multiphysics 4.1, 4.2, 4.3 as well as the previous version 3.5a. Additionally, support for basic SBML has been added along with the possibility to compile in stand-alone mode.} } @TechReport{ it:2012-035, author = {Per Pettersson and Gianluca Iaccarino and Jan Nordstr{\"o}m}, title = {An Intrusive Hybrid Method for Discontinuous Two-Phase Flow under Uncertainty}, institution = it, department = tdb, year = 2012, number = {2012-035}, month = dec, abstract = {An intrusive stochastic projection method for two-phase time-dependent flow subject to uncertainty is presented. Numerical experiments are carried out assuming uncertainty in the interface location, but the framework generalizes to uncertainty with known distribution in other input data. Uncertainty is represented through a truncated multiwavelet expansion. We assume that the discontinuous features of the solution are restricted to subdomains in the computational domain and use a high-order method for the smooth regions coupled weakly through interfaces with a robust shock capturing method for the non-smooth regions. The discretization of the non-smooth region is based on a generalization of the HLL flux, and have many properties in common with its deterministic counterpart. It is simple and robust, and captures the statistics of the shock. The discretization of the smooth region is carried out with high-order finite-difference operators satisfying a summation-by-parts property.} } @TechReport{ it:2012-034, author = {Siyang Wang and Johan Elf and Stefan Hellander and Per L{\"o}tstedt}, title = {Stochastic Reaction-Diffusion Processes with Embedded Lower Dimensional Structures}, institution = it, department = tdb, year = 2012, number = {2012-034}, month = dec, abstract = {Small copy numbers of many molecular species in biological cells require stochastic models of the chemical reactions between the molecules and their motion. Important reactions often take place on one dimensional structures embedded in three dimensions with molecules migrating between the dimensions. Examples of polymer structures in cells are DNA, microtubules, and actin filaments. An algorithm for simulation of such systems is developed at a mesoscopic level of approximation. An arbitrarily shaped polymer is coupled to a background Cartesian mesh in three dimensions. The realization of the system is made with a stochastic simulation algorithm in the spirit of Gillespie. The method is applied to model problems for verification and two more detailed models of transcription factor interaction with the DNA.} } @TechReport{ it:2012-033, author = {Per Pettersson and Gianluca Iaccarino and Jan Nordstr{\"o}m}, title = {A Stochastic {G}alerkin Method for the {E}uler Equations with {R}oe Variable Transformation}, institution = it, department = tdb, year = 2012, number = {2012-033}, month = nov, note = {This is a complete rewrite of report nr 2012-021 with new results. A more general framework for the representation of uncertainty is used. All figures have been replaced and more numerical results have been added (methods of manufactured solutions, convergence in space and the stochastic dimension for subsonic and supersonic flow).}, abstract = {The Euler equations subject to uncertainty in the initial and boundary conditions are investigated via the stochastic Galerkin approach. We present a new fully intrusive method based on a variable transformation of the continuous equations. Roe variables are employed to get quadratic dependence in the flux function and a well-defined Roe average matrix that can be determined without matrix inversion. In previous formulations based on generalized polynomial chaos expansion of the physical variables, the need to introduce stochastic expansions of inverse quantities, or square-roots of stochastic quantities of interest, adds to the number of possible different ways to approximate the original stochastic problem. We present a method where the square roots occur in the choice of variables and no auxiliary quantities are needed, resulting in an unambiguous problem formulation. The Roe formulation saves computational cost compared to the formulation based on expansion of conservative variables. Moreover, the Roe formulation is more robust and can handle cases of supersonic flow, for which the conservative variable formulation fails to produce a bounded solution. We use a multi-wavelet basis that can be chosen to include a large number of resolution levels to handle more extreme cases (e.g. strong discontinuities) in a robust way. For smooth cases, the order of the polynomial representation can be increased for increased accuracy.} } @TechReport{ it:2012-032, author = {Sofia Eriksson and Jan Nordstr{\"o}m}, title = {Exact Non-Reflecting Boundary Conditions Revisited: Well-Posedness and Stability}, institution = it, department = tdb, year = 2012, number = {2012-032}, month = oct, abstract = {Exact non-reflecting boundary conditions for an incompletely parabolic system have been studied. It is shown that well-posedness is a fundamental property of the non-reflecting boundary conditions. By using summation by parts operators for the numerical approximation and a weak boundary implementation, energy stability follows automatically. The stability in combination with the high order accuracy results in a reliable, efficient and accurate method. The theory is supported by numerical simulations. } } @TechReport{ it:2012-031, author = {Parosh Aziz Abdulla and Mohamed Faouzi Atig and Jonathan Cederberg}, title = {Timed Lossy Channel Systems}, institution = it, department = docs, year = 2012, number = {2012-031}, month = oct, abstract = {Lossy channel systems are a classical model with applications ranging from the modeling of communication protocols to programs running on weak memory models. All existing work assume that messages traveling inside the channels are picked from a finite alphabet. In this paper, we extend the model by assuming that each message is equipped with a clock representing the age of the message, thus obtaining the model of \emph{Timed Lossy Channel Systems (TLCS)}. The main contribution of the paper is to show that the control state reachability problem is decidable for TLCS.} } @TechReport{ it:2012-030, author = {David Ekl{\"o}v and Nikos Nikoleris and Erik Hagersten}, title = {A Profiling Method for Analyzing Scalability Bottlenecks on Multicores}, institution = it, department = docs, year = 2012, number = {2012-030}, month = oct, abstract = {A key goodness metric of multi-threaded programs is how their execution times scale when increasing the number of threads. However, there are several bottlenecks that can limit the scalability of a multi-threaded program, e.g., contention for shared cache capacity and off-chip memory bandwidth; and synchronization overheads. In order to improve the scalability of a multi-threaded program, it is vital to be able to quantify how the program is impacted by these scalability bottlenecks. We present a software-only profiling method for obtaining {\it speedup stacks}. A speedup stack reports how much each scalability bottleneck limits the scalability of a multi-threaded program. It thereby quantifies how much its scalability can be improved by eliminating a given bottleneck. A software developer can use this information to determine what optimizations are most likely to improve scalability, while a computer architect can use it to analyze the resource demands of emerging workloads. The proposed method profiles the program on real commodity multi-cores (i.e., no simulations required) using existing performance counters. Consequently, the obtained speedup stacks accurately account for all idiosyncrasies of the machine on which the program is profiled. While the main contribution of this paper is the profiling method to obtain speedup stacks, we present several examples of how speedup stacks can be used to analyze the resource requirements of multi-threaded programs. Furthermore, we discuss how their scalability can be improved by both software developers and computer architects.} } @TechReport{ it:2012-029, author = {David Ekl{\"o}v and Nikos Nikoleris and David Black-Schaffer and Erik Hagersten}, title = {Quantitative Characterization of Memory Contention}, institution = it, department = docs, year = 2012, number = {2012-029}, month = oct, abstract = {On multicore processors, co-executing applications compete for shared resources, such as cache capacity and memory bandwidth. This leads to suboptimal resource allocation and can cause substantial performance loss, which makes it important to effectively manage these shared resources. This, however, requires insights into how the applications are impacted by such resource sharing. While there are several methods to analyze the performance impact of cache contention, less attention has been paid to general, quantitative methods for analyzing the impact of contention for memory bandwidth. To this end we introduce the \textit{Bandwidth Bandit}, a general, quantitative, profiling method for analyzing the performance impact of contention for memory bandwidth on multicore machines. The profiling data captured by the Bandwidth Bandit is presented in a {\it bandwidth graph}. This graph accurately captures the measured application's performance as a function of its available memory bandwidth, and enables us to determine how much the application suffers when its available bandwidth is reduced. To demonstrate the value of this data, we present a case study in which we use the bandwidth graph to analyze the performance impact of memory contention when co-running multiple instances of single threaded application.} } @TechReport{ it:2012-028, author = {Per Pettersson and Alireza Doostan and Jan Nordstr{\"o}m}, title = {On Stability and Monotonicity Requirements of Discretized Stochastic Conservation Laws with Random Viscosity}, institution = it, department = tdb, year = 2012, number = {2012-028}, month = sep, abstract = {The stochastic Galerkin and collocation methods are used to solve an advection-diffusion equation with uncertain and spatially varying viscosity. We investigate well-posedness, monotonicity and stability for the extended system resulting from the Galerkin projection of the advection-diffusion equation onto the stochastic basis functions. High-order summation-by-parts operators and weak imposition of boundary conditions are used to prove stability of the semi-discrete system. It is essential that the eigenvalues of the resulting viscosity matrix of the stochastic Galerkin system are positive and we investigate conditions for this to hold. When the viscosity matrix is diagonalizable, stochastic Galerkin and stochastic collocation are similar in terms of computational cost, and for some cases the accuracy is higher for stochastic Galerkin provided that monotonicity requirements are met. We also investigate the total spatial operator of the semi-discretized system and its impact on the convergence to steady-state.} } @TechReport{ it:2012-027, author = {Carl Nettelblad}, title = {Breakdown of Methods for Phasing and Imputation in the Presence of Double Genotype Sharing}, institution = it, department = tdb, year = 2012, number = {2012-027}, month = sep, abstract = {In genome-wide association studies, results have been improved through imputation of a denser marker set based on reference haplotypes and phasing of the genotype data. To better handle very large sets of reference haplotypes, pre-phasing with only study individuals has been suggested. We present a possible problem which is aggravated when pre-phasing strategies are used, and suggest a modification avoiding these issues with application to the MaCH tool. We evaluate the effectiveness of our remedy to a subset of Hapmap data, comparing the original version of MaCH and our modified approach. Improvements are demonstrated on the original data (phase switch error rate decresasing by 10\%), but the differences are more pronounced in cases where the data is augmented to represent the presence of closely related individuals, especially when siblings are present (30\% reduction in switch error rate in the presence of children, 47\% reduction in the presence of siblings). When introducing siblings, the switch error rate in results from the unmodified version of MaCH increases significantly compared to the original data. The main conclusions of this investigation is that existing statistical methods for phasing and imputation of unrelated individuals might give subpar quality results if a subset of study individuals nonetheless are related. As the populations collected for general genome-wide association studies grow in size, including relatives might become more common. If a general GWAS framework for unrelated individuals would be employed on datasets where sub-populations originally collected as familial case-control sets are included, caution should also be taken regarding the quality of haplotypes. Our modification to MaCH is available on request and straightforward to implement. We hope that this mode, if found to be of use, could be integrated as an option in future standard distributions of MaCH.} } @TechReport{ it:2012-026, author = {Carl Nettelblad}, title = {Inferring Haplotypes and Parental Genotypes in Larger Full Sib-Ships and Other Pedigrees with Missing or Erroneous Genotype Data}, institution = it, department = tdb, year = 2012, number = {2012-026}, month = sep, abstract = {\textbf{Background:} In many contexts, pedigrees for individuals are known even though not all individuals have been fully genotyped. In one extreme case, the genotypes for a set of full siblings are known, with no knowledge of parental genotypes. We propose a method for inferring phased haplotypes and genotypes for all individuals, even those with missing data, in such pedigrees, allowing a multitude of classic and recent methods for linkage and genome analysis to be used more efficiently. \textbf{Results:} By artificially removing the founder generation genotype data from a well-studied simulated dataset, the quality of reconstructed genotypes in that generation can be verified. For the full structure of repeated matings with $15$ offspring per mating, $10$ dams per sire, $99.89\%$ of all founder markers were phased correctly, given only the unphased genotypes for offspring. The accuracy was reduced only slightly, to $99.51\%$, when introducing a $2\%$ error rate in offspring genotypes. When reduced to only $5$ full-sib offspring in a single sire-dam mating, the corresponding percentage is $92.62\%$, which compares favorably with $89.28\%$ from the leading Merlin package. Furthermore, Merlin is unable to handle more than approximately $10$ sibs, as the number of states tracked rises exponentially with family size, while our approach has no such limit and handles $150$ half-sibs with ease in our experiments. \textbf{Conclusions:} Our method is able to reconstruct genotypes for parents when genotype data is only available for offspring individuals, as well as haplotypes for all individuals. Compared to the Merlin package, we can handle larger pedigrees and produce superior results, mainly due to the fact that Merlin uses the Viterbi algorithm on the state space to infer the genotype sequence. Tracking of haplotype and allele origin can be used in any application where the marker set does not directly influence genotype variation influencing traits. Inference of genotypes can also reduce the effects of genotyping errors and missing data. The \texttt{cnF2freq} codebase implementing our approach is available under a BSD-style license.} } @TechReport{ it:2012-025, author = {Jens Berg and Jan Nordstr{\"o}m}, title = {On the Impact of Boundary Conditions on Dual Consistent Finite Difference Discretizations}, institution = it, department = tdb, year = 2012, number = {2012-025}, month = sep, abstract = {In this paper we derive well-posed boundary conditions for a linear incompletely parabolic system of equations, which can be viewed as a model problem for the compressible Navier-Stokes equations. We show a general procedure for the construction of the boundary conditions such that both the primal and dual equations are well-posed. The form of the boundary conditions is chosen such that reduction to first order form with its complications can be avoided. The primal equation is discretized using finite difference operators on summation-by-parts form with weak boundary conditions. It is shown that the discretization can be made energy stable, and that energy stability is sufficient for dual consistency. Since reduction to first order form can be avoided, the discretization is significantly simpler compared to a discretization using Dirichlet boundary conditions. We compare the new boundary conditions with standard Dirichlet boundary conditions in terms of rate of convergence, errors and discrete spectra. It is shown that the scheme with the new boundary conditions is not only far simpler, but have smaller errors, error bounded properties, and highly optimizable eigenvalues, while maintaining all desirable properties of a dual consistent discretization.} } @TechReport{ it:2012-024, author = {Katharina Kormann and Elisabeth Larsson}, title = {An {RBF}-{G}alerkin Approach to the Time-Dependent {S}chr{\"o}dinger Equation}, institution = it, department = tdb, year = 2012, number = {2012-024}, month = sep, abstract = {In this article, we consider the discretization of the time-dependent Schrödinger equation using radial basis functions (RBF). We formulate the discretized problem over an unbounded domain without imposing explicit boundary conditions. Since we can show that time-stability of the discretization is not guaranteed for an RBF-collocation method, we propose to employ a Galerkin ansatz instead. For Gaussians, it is shown that exponential convergence is obtained up to a point where a systematic error from the domain where no basis functions are centered takes over. The choice of the shape parameter and of the resolved region is studied numerically. Compared to the Fourier method with periodic boundary conditions, the basis functions can be centered in a smaller domain which gives increased accuracy with the same number of points.} } @TechReport{ it:2012-023, author = {Katharina Kormann}, title = {A Time-Space Adaptive Method for the {S}chr{\"o}dinger Equation}, institution = it, department = tdb, year = 2012, number = {2012-023}, month = aug, note = {Updated 2012-09-12 (typos fixed).}, abstract = {In this paper, we present a discretization of the time-dependent Schr{\"o}dinger equation based on a Magnus-Lanczos time integrator and high-order Gauss-Lobatto finite elements in space. A truncated Galerkin orthogonality is used to obtain duality-based a posteriori error estimates that address the temporal and the spatial error separately. Based on this theory, a space-time adaptive solver for the Schr{\"o}dinger equation is devised. An efficient matrix-free implementation of the differential operator, suited for spectral elements, is used to enable computations for realistic configurations. We demonstrate the performance of the algorithm for the example of matter-field interaction.} } @TechReport{ it:2012-022, author = {Olof Rensfelt and Frederik Hermans and Thiemo Voigt and Edith Ngai and Lars-{\AA}ke Nord{\'e}n and Per Gunningberg}, title = {{SoNIC}: Classifying and Surviving Interference in 802.15.4-based Sensor Networks}, institution = it, department = docs, year = 2012, number = {2012-022}, month = aug, abstract = {Sensor networks that use 802.15.4 in the 2.45~GHz ISM band are prone to radio interference from devices such as microwave ovens, WiFi devices, and Bluetooth devices. Interference can cause packet loss and thus reduces network performance and lifetime. Online detection of interference in sensor networks is challenging, because nodes cannot decode foreign transmissions due to incompatibility between 802.15.4 and other technologies. We present SoNIC, a system that enables sensor networks to detect and classify cross-technology interference. At the core of SoNIC lies an interference classification approach that assigns individual corrupted 802.15.4 packets to different interference classes. A voting algorithm fuses the classification results to detect the presence of interferers. The output of the voting can be used to select a mitigation strategy. In contrast to other solutions, SoNIC does not require active spectrum scanning or specialized hardware. It runs on ordinary TelosB sensor nodes using Contiki. This technical report describes the core idea of SoNIC, selected system aspects of it (including three mitigation strategies), as well as initial experiments in an office environment.} } @TechReport{ it:2012-021, author = {Per Pettersson and Gianluca Iaccarino and Jan Nordstr{\"o}m}, title = {A {R}oe Variable Based Chaos Method for the {E}uler Equations under Uncertainty}, institution = it, department = tdb, year = 2012, number = {2012-021}, month = aug, abstract = {The Euler equations subject to uncertainty in the input parameters are investigated via the stochastic Galerkin approach. We present a new fully intrusive method based on a variable transformation of the continuous equations. Roe variables are employed to get quadratic dependence in the flux function and a well-defined Roe average matrix that can be determined without matrix inversion. In previous formulations based on generalized chaos expansion of the physical variables, the need to introduce stochastic expansions of inverse quantities, or square-roots of stochastic quantities of interest, adds to the number of possible different ways to approximate the original stochastic problem. We present a method where no auxiliary quantities are needed, resulting in an unambiguous problem formulation. The Roe formulation saves computational cost compared to the formulation based on expansion of conservative variables. Moreover, the Roe formulation is more robust and can handle cases of supersonic flow, for which the conservative variable formulation leads to instability. For more extreme cases, where the global Legendre polynomials poorly approximate discontinuities in stochastic space, we use the localized Haar wavelet basis.}, note = {A compete rewrite with new results appears as report nr 2012-033.} } @TechReport{ it:2012-020, author = {Elisabeth Larsson and Erik Lehto and Alfa Heryudono and Bengt Fornberg}, title = {Stable Computation of Differentiation Matrices and Scattered Node Stencils Based on {G}aussian Radial Basis Functions}, institution = it, department = tdb, year = 2012, number = {2012-020}, month = aug, abstract = {Radial basis function (RBF) approximation has the potential to provide spectrally accurate function approximations for data given at scattered node locations. For smooth solutions, the best accuracy for a given number of node points is typically achieved when the basis functions are scaled to be nearly flat. This also results in nearly linearly dependent basis functions and severe ill-conditioning of the interpolation matrices. Fornberg, Larsson, and Flyer recently developed the RBF-QR method which provides a numerically stable approach to interpolation with flat and nearly flat Gaussian RBFs. In this work, we consider how to extend this method to the task of computing differentiation matrices and stencil weights in order to solve partial differential equations. The expressions for first and second order derivative operators as well as hyperviscosity operators are established, numerical issues such as how to deal with non-unisolvency are resolved, and the accuracy and computational efficiency of the method is tested numerically. The results indicate that using the RBF-QR approach for solving PDE problems can be very competitive compared with using the ill-conditioned direct solution approach or using variable precision arithmetic to overcome the conditioning issue.} } @TechReport{ it:2012-019, author = {Owe Axelsson and Xin He and Maya Neytcheva}, title = {Numerical Solution of the Time-Dependent {N}avier-{S}tokes Equation for Variable Density--Variable Viscosity}, institution = it, department = tdb, year = 2012, number = {2012-019}, month = aug, abstract = {We consider methods for numerical simulations of variable density incompressible fluids, modelled by the Navier-Stokes equations. Variable density problems arise, for instance, in interfaces between fluids of different densities in multiphase flows such as appear in porous media problems. It is shown that by solving the Navier-Stokes equation for the momentum variable instead of the velocity, the corresponding saddle point problem, which arises at each time step, becomes automatically regularized, enabling elimination of the pressure variable and leading to a, for the iterative solution, efficient preconditioning of the arising block matrix. We present also stability bounds and a second order operator splitting method. The theory is illustrated by numerical experiments. For reasons of comparison we also include test results for a method, based on coupling of the Navier-Stokes equations with a phase-field model.} } @TechReport{ it:2012-018, author = {Bengt Fornberg and Erik Lehto and Collin Powell}, title = {Stable Calculation of {G}aussian-based {RBF-FD} Stencils}, institution = it, department = tdb, year = 2012, number = {2012-018}, month = aug, abstract = {Traditional finite difference (FD) methods are designed to be exact for low degree polynomials. They can be highly effective on Cartesian-type grids, but may fail for unstructured node layouts. Radial basis function-generated finite difference (RBF-FD) methods overcome this problem and, as a result, provide a much improved geometric flexibility. The calculation of RBF-FD weights involves a shape parameter $\varepsilon$. Small values of $\varepsilon$\ (corresponding to near-flat RBFs) often lead to particularly accurate RBF-FD formulas. However, the most straightforward way to calculate the weights (RBF-Direct) then becomes numerically highly ill-conditioned. In contrast, the present algorithm remains numerically stable all the way into the $\varepsilon\rightarrow0$ limit. Like the RBF-QR algorithm, it uses the idea of finding a numerically well-conditioned basis function set in the same function space as is spanned by the ill-conditioned near-flat original Gaussian RBFs. By exploiting some properties of the incomplete gamma function, it transpires that the change of basis can be achieved without dealing with any infinite expansions. Its strengths and weaknesses compared the Contour-Pad{\'e}, RBF-RA,\ and RBF-QR algorithms are discussed.} } @TechReport{ it:2012-017, author = {Xin He and Marcus Holm and Maya Neytcheva}, title = {Efficiently Parallel Implementation of the Inverse {S}herman-{M}orrison Algorithm}, institution = it, department = tdb, year = 2012, number = {2012-017}, month = aug, abstract = {We contribute two parallel strategies to compute the exact and approximate inverse of a dense matrix, based on the so-called inverse Sherman-Morrison algorithm and demonstrate their efficiencies on multicore CPU and GPU-equipped computers. Our methods are shown to be much better than a common matrix inverse method, yielding up to 12 times faster performance. A comparison of the performance of the CPU and GPU versions is made and analyzed with the aid of a performance model.} } @TechReport{ it:2012-016, author = {Xin He and Maya Neytcheva}, title = {On Preconditioning Incompressible Non-{N}ewtonian Flow Problems}, institution = it, department = tdb, year = 2012, number = {2012-016}, month = aug, abstract = {This paper concerns preconditioned iterative solution methods for solving incompressible non-Newtonian Navier-Stokes equations as arising in regularized Bingham models. Stable finite element discretization applied to the linearized equations results in linear systems of saddle point form. In order to circumvent the difficulties of efficiently and cheaply preconditioning the Schur complement of the system matrix, in this paper the augmented Lagrangian (AL) technique is used to algebraically transform the original system into an equivalent one, which does not change the solution and that is the linear system we intend to solve by some preconditioned iterative method. For the transformed system matrix a lower block-triangular preconditioner is proposed. The crucial point in the AL technique is how to efficiently solve the modified pivot block involving the velocity. In this paper an approximation of the modified pivot block is proposed and an algebraic multi-grid technique is applied for this approximation. Numerical experiments show that the AL preconditioner combining with the algebraic multi-grid solver is quite efficient and robust with respect to the variation of the mesh size and the regularized parameter of the Bingham model.} } @TechReport{ it:2012-015, author = {Josefin Ahlkrona and Nina Kirchner and Per L{\"o}tstedt}, title = {A Numerical Study of the Validity of Shallow Ice Approximations}, institution = it, department = tdb, year = 2012, number = {2012-015}, month = jul, abstract = {Improving numerical ice sheet models is a very active field of research. In part, this is because ice sheet modelling has gained societal relevance in the context of predictions of future sea level rise. Ice sheet modelling is however also a challenging mathematical and computational subject. Since the exact equations governing ice dynamics, the full Stokes equations, are computationally expensive to solve, approximations are crucially needed for many problems. Shallow ice approximations are a family of approximations derived by asymptotic expansion of the exact equations in terms of the aspect ratio, $\epsilon$. Retaining only the zeroth order terms in this expansion yields the by far most frequently used approximation; the Shallow Ice Approximation (SIA). Including terms up to second order yields the Second Order Shallow Ice Approximation (SOSIA), which is a so-called higher order model. Here, we study the validity and accuracy of shallow ice approximations beyond previous analyses of the SIA. We perform a detailed analysis of the assumptions behind shallow ice approximations, i.e. of the order of magnitude of field variables. We do this by using a numerical solution of the exact equations for ice flow over a sloping, undulating bed. We also construct analytical solutions for the SIA and SOSIA and numerically compute the accuracy for varying $\epsilon$ by comparing to the exact solution. We find that the assumptions underlying shallow ice approximations are not entirely appropriate since they do not account for a high viscosity boundary layer developing near the ice surface as soon as small bumps are introduced at the ice base. This boundary layer is thick and has no distinct border. Other existing theories which do incorporate the boundary layer are in better, but not full, agreement with our numerical results. Our results reveal that neither the SIA nor the SOSIA is as accurate as suggested by the asymptotic expansion approach. Also, in SOSIA the ice rheology needs to be altered to avoid infinite viscosity, though both our analytical and numerical solutions show that, especially for high bump amplitudes, the accuracy of the SOSIA is highly sensitive to this alternation. However, by updating the SOSIA solution in an iterative manner, we obtain a model which utilises the advantages of shallow ice approximations, while reducing the disadvantages.} } @TechReport{ it:2012-014, author = {Jens Berg and Jan Nordstr{\"o}m}, title = {A Stable and Dual Consistent Boundary Treatment Using Finite Differences on Summation-By-Parts Form}, institution = it, department = tdb, year = 2012, number = {2012-014}, month = jul, abstract = {This paper is concerned with computing very high order accurate linear functionals from a numerical solution of a time-dependent partial differential equation (PDE). Based on finite differences on summation-by-parts form, together with a weak implementation of the boundary conditions, we show how to construct suitable boundary conditions for the PDE such that the continuous problem is well-posed and the discrete problem is stable and spatially dual consistent. These two features result in a superconvergent functional, in the sense that the order of accuracy of the functional is provably higher than that of the solution.} } @TechReport{ it:2012-013, author = {Philip Axer and Rolf Ernst and Heiko Falk and Alain Girault and Daniel Grund and Nan Guan and Bengt Jonsson and Peter Marwedel and Jan Reineke and Christine Rochange and Maurice Sebastian and von Hanxleden, Reinhard and Reinhard Wilhelm and Wang Yi}, title = {Building Timing Predictable Embedded Systems}, institution = it, department = docs, year = 2012, number = {2012-013}, month = jul, abstract = {A large class of embedded systems is distinguished from general purpose computing systems by the need to satisfy strict requirements on timing, often under constraints on available resources. Predictable system design is concerned with the challenge of building systems for which timing requirements can be guaranteed a priori. Perhaps paradoxically, this problem has become more difficult by the introduction of performance-enhancing architectural elements, such as caches, pipelines, and multithreading, which introduce a large degree of nondeterminism and make guarantees harder to provide. The intention of this paper is to summarize current state-of-the-art in research concerning how to build predictable yet performant systems. We suggest precise definitions for the concept of ``predictability'', and present predictability concerns at different abstractions levels in embedded software design. First, we consider timing predictability of processor instruction sets. Thereafter, We consider how programming languages can be equipped with predictable timing semantics, covering both a language-based approach based on the synchronous paradigm, as well as an environment that provides timing semantics for a mainstream programming language (in this case C). We present techniques for achieving timing predictability on multicores. Finally we discuss how to handle predictability at the level of networked embedded systems, where randomly occurring errors must be considered.} } @TechReport{ it:2012-012, author = {Anders Goude and Stefan Engblom}, title = {Adaptive Fast Multipole Methods on the {GPU}}, institution = it, department = tdb, year = 2012, number = {2012-012}, month = may, abstract = {We present a highly general implementation of fast multipole methods on graphical processor units (GPUs). Our two-dimensional double precision code features an asymmetric type of adaptive space discretization leading to a particularly elegant and flexible implementation. All steps of the multipole algorithm are efficiently performed on the GPU, including the initial phase which assembles the topological information of the input data. Through careful timing experiments we investigate the effects of the various peculiarities with the GPU architecture.} } @TechReport{ it:2012-011, author = {Martins da Silva, Margarida and Torbj{\"o}rn Wigren and Teresa Mendonca}, title = {An Adaptive Controller for Propofol in Anesthesia under Synergistic Remifentanil Disturbances}, institution = it, department = syscon, year = 2012, number = {2012-011}, month = may, abstract = {An adaptive controller for propofol administration relying on a minimally parameterized SISO Wiener model for the depth of anesthesia is proposed. The exact linearization of this minimal Wiener structure using the continuous-time model parameters estimated online by an extended Kalman filter is a key point in the design. A linear quadratic Gaussian controller is developed for the exactly linearized system. The depth of anesthesia is considered to be measured by the bispectral index. Excellent results were obtained when the robustness of the proposed controller with respect to inter and intrapatient variability was assessed through simulations on a database of 500 patients. The closed-loop system showed good disturbance rejection properties when the synergistic effect of remifentanil in the depth of anesthesia was considered.} } @TechReport{ it:2012-010, author = {Muneeb Khan and Nikos Nikoleris and Erik Hagersten}, title = {Investigating How Simple Software Optimizations Effect Relative Throughput Scaling on Multicores}, institution = it, department = docs, year = 2012, number = {2012-010}, month = may, abstract = {Optimizing applications for off-chip bandwidth usage has become increasingly critical as computing resources in multicore processors have increased much faster than shared resources, off-chip bandwidth and shared cache capacity. While improved use of shared resources can benefit single core performance, it is crucial for system with several active cores, where the way each core uses shared resources can directly impact the performance of sibling cores. Although optimizing for memory bandwidth has been a priority for decades, the tools to effectively profile application memory accesses are relatively new. With such tools we can uncover memory accesses that use shared cache capacity and memory bandwidth inefficiently, and trace them back to the original source code. This paper presents case studies of using memory access profiles to uncover and explain critical memory access issues for three selected workloads. These memory bottlenecks are resolved using commonly applicable software optimization techniques. We then investigate the throughput wall $-$ the relationship between the drop in off-chip traffic, post-optimization, and the resulting throughput gain achieved. Our experiments for multi-execution show that, after optimization, the drop in off-chip traffic is reflected in the maximum throughput that can be achieved by the optimized workloads relative to the original.} } @TechReport{ it:2012-009, author = {Petia Boyanova and Maya Neytcheva}, title = {Efficient Numerical Solution of Discrete Multi-Component {C}ahn-{H}illiard Systems}, institution = it, department = tdb, year = 2012, number = {2012-009}, month = apr, abstract = {In this work we develop preconditioners for the iterative solution of the large scale algebraic systems, arising in finite element discretizations of microstructures with an arbitrary number of components, described by the diffusive interface model. The suggested numerical techniques are applied to the study of ternary fluid flow processes.} } @TechReport{ it:2012-008, author = {Kenneth Duru and Gunilla Kreiss}, title = {Numerical Interaction of Boundary Waves with Perfectly Matched Layers in Elastic Waveguides}, institution = it, department = tdb, year = 2012, number = {2012-008}, month = apr, abstract = {Perfectly matched layers (PML) are a novel technique to simulate the absorption of waves in open domains. Wave propagation in isotropic elastic waveguides has the possibility to support propagating modes with negative group velocities and long wavelengths. Propagating modes with negative group velocities can lead to temporally growing solutions in the PML. In this paper, we demonstrate that negative group velocities in isotropic elastic waveguides are not harmful to the discrete PML. Analysis and numerical experiments confirm the accuracy and the stability of the PML.} } @TechReport{ it:2012-007, author = {Kenneth Duru and Gunilla Kreiss}, title = {Boundary Waves and Stability of the Perfectly Matched Layer}, institution = it, department = tdb, year = 2012, number = {2012-007}, month = apr, abstract = {We study the stability of the perfectly matched layer (PML) for symmetric second order hyperbolic partial differential equations on the upper half plane, with boundary conditions at $y = 0$. Using a mode analysis, we develop a technique to analyze the stability of the corresponding initial boundary value problem on the half plane. We apply our technique to the PML for the elastic wave equations subject to free surface and homogenous Dirichlet boundary conditions, and to the PML for the curl--curl Maxwell's equation subject to insulated walls and perfectly conducting walls boundary conditions. The conclusion is that these half--plane problems do not support temporally modes.} } @TechReport{ it:2012-006, author = {Johanna Bj{\"o}rklund and Eric J{\"o}nsson and Lisa Kaati}, title = {Aspects of Plan Operators in a Tree Automata Framework}, institution = it, year = 2012, number = {2012-006}, month = apr, abstract = {Plan recognition addresses the problem of inferring an agents goals from its action. Applications range from anticipating care-takers' needs to predicting volatile situations. In this contribution, we describe a prototype plan recognition system that is based on the well-researched theory of (weighted) finite tree automata. To illustrate the system?s capabilities, we use data gathered from matches in the real-time strategy game StarCraft II. Finally, we discuss how more advanced plan operators can be accommodated for in this framework while retaining computational efficiency by taking after the field of formal model checking and over-approximating the target language.} } @TechReport{ it:2012-005, author = {Stefan Engblom}, title = {On the Stability of Stochastic Jump Kinetics}, institution = it, department = tdb, year = 2012, number = {2012-005}, month = feb, abstract = {Motivated by the lack of a suitable framework for analyzing popular stochastic models of Systems Biology, we devise conditions for existence and uniqueness of solutions to certain jump stochastic differential equations (jump SDEs). Working from simple examples we find \emph{reasonable} and \emph{explicit} assumptions on the driving coefficients for the SDE representation to make sense. By `reasonable' we mean that stronger assumptions generally do not hold for systems of practical interest. In particular, we argue against the traditional use of global Lipschitz conditions and certain common growth restrictions. By `explicit', finally, we like to highlight the fact that the various constants occurring among our assumptions \emph{all can be determined once the model is fixed}. We show how basic perturbation results can be derived in this setting such that these can readily be compared with the corresponding estimates from deterministic dynamics. The main complication is that the natural path-wise representation is generated by a counting measure with an intensity that depends nonlinearly on the state.} } @TechReport{ it:2012-004, author = {Jens Berg and Jan Nordstr{\"o}m}, title = {Superconvergent Functional Output for Time-Dependent Problems using Finite Differences on Summation-By-Parts Form}, institution = it, department = tdb, year = 2012, number = {2012-004}, month = feb, abstract = {Finite difference operators satisfying the summation-by-parts (SBP) rules can be used to obtain high order accurate, energy stable schemes for time-dependent partial differential equations, when the boundary conditions are imposed weakly by the simultaneous approximation term (SAT). In general, an SBP-SAT discretization is accurate of order p+1 with an internal accuracy of 2p and a boundary accuracy of p. Despite this, it is shown in this paper that any linear functional computed from the time-dependent solution, will be accurate of order 2p when the boundary terms are imposed in a stable and dual consistent way. The method does not involve the solution of the dual equations, and superconvergent functionals are obtained at no extra computational cost. Four representative model problems are analyzed in terms of convergence and errors, and it is shown in a systematic way how to derive schemes which gives superconvergent functional outputs.} } @TechReport{ it:2012-003, author = {David Ekl{\"o}v and Nikos Nikoleris and David Black-Schaffer and Erik Hagersten}, title = {Design and Evaluation of the {B}andwidth {B}andit}, institution = it, department = docs, year = 2012, number = {2012-003}, month = feb, abstract = {Applications that are co-scheduled on a multicore compete for shared resources, such as cache capacity and memory bandwidth. The performance degradation resulting from this contention can be substantial, which makes it important to effectively manage these shared resources. This, however, requires an understanding of how applications are impacted by such contention. While the effects of contention for cache capacity have been extensively studied, less is known about the effects of contention for memory bandwidth. This is in large due to its complex nature, as sensitivity to bandwidth contention depends on bottlenecks at several levels of the memory-system, the interaction and locality properties of the application~s access stream. This paper explores the contention effects of increased latency and decreased memory parallelism at different points in the memory hierarchy, both of which cause decreases in available bandwidth. To understand the impact of such contention on applications, it also presents a method whereby an application~s overall sensitivity to different degrees of bandwidth contention can be directly measured. This method is used to demonstrate the varying contention sensitivity across a selection of benchmarks, and explains why some of them experience substantial slowdowns long before the overall memory bandwidth saturates. } } @TechReport{ it:2012-002, author = {O. Axelsson and P. Boyanova and M. Kronbichler and M. Neytcheva and X. Wu}, title = {Numerical and Computational Efficiency of Solvers for Two-Phase Problems}, institution = it, department = tdb, year = 2012, number = {2012-002}, month = jan, abstract = {We consider two-phase flow problems, modelled by the Cahn-Hilliard equation. In our work, the nonlinear fourth-order equation is decomposed into a system of two second-order equations for the concentration and the chemical potential. We analyse solution methods based on an approximate two-by-two block factorization of the Jacobian of the nonlinear discrete problem. We propose a preconditioning technique that reduces the problem of solving the non-symmetric discrete Cahn-Hilliard system to the problem of solving systems with symmetric positive definite matrices where off-the-shelf multilevel and multigrid algorithms are directly applicable. The resulting solution methods exhibit optimal convergence and computational complexity properties and are suitable for parallel implementation. We illustrate the efficiency of the proposed methods by various numerical experiments, including parallel results for large scale three dimensional problems.} } @TechReport{ it:2012-001, author = {Magnus Gustafsson and James Demmel and Sverker Holmgren}, title = {Numerical Evaluation of the Communication-Avoiding {L}anczos Algorithm}, institution = it, department = tdb, year = 2012, number = {2012-001}, month = jan, abstract = {The Lanczos algorithm is widely used for solving large sparse symmetric eigenvalue problems when only a few eigenvalues from the spectrum are needed. Due to sparse matrix-vector multiplications and frequent synchronization, the algorithm is communication intensive leading to poor performance on parallel computers and modern cache-based processors. The Communication-Avoiding Lanczos algorithm [Hoemmen; 2010] attempts to improve performance by taking the equivalence of $s$ steps of the original algorithm at a time. The scheme is equivalent to the original algorithm in exact arithmetic but as the value of $s$ grows larger, numerical roundoff errors are expected to have a greater impact. In this paper, we investigate the numerical properties of the Communication-Avoiding Lanczos (CA-Lanczos) algorithm and how well it works in practical computations. Apart from the algorithm itself, we have implemented techniques that are commonly used with the Lanczos algorithm to improve its numerical performance, such as semi-orthogonal schemes and restarting. We present results that show that CA-Lanczos is often as accurate as the original algorithm. In many cases, if the parameters of the $s$-step basis are chosen appropriately, the numerical behaviour of CA-Lanczos is close to the standard algorithm even though it is somewhat more sensitive to loosing mutual orthogonality among the basis vectors.} } @TechReport{ it:2011-032, author = {Bj{\"o}rn Halvarsson and Torsten Wik and Bengt Carlsson}, title = {New Input-Output Pairing Strategies Based on Linear Quadratic {G}aussian Control}, institution = it, department = syscon, year = 2011, number = {2011-032}, month = dec, abstract = {Two input-output pairing strategies based on linear quadratic Gaussian (LQG) control are suggested. In the first strategy, denoted linear quadratic interaction index (LQII), input-output pairing suggestions are found from a minimization of the output signal variance. This index not only guides to what pairing should be tried, it also gives a direct measure of how much better a full MIMO controller can perform. The second proposed interaction measure, denoted integrating linear quadratic index array (ILQIA), focuses more on the low frequency behaviour of the considered plant, such as load disturbances. The strategies are used to compare the expected performance of decentralized control structures in some illustrative examples. The pairing suggestions are compared with the recommendations previously obtained using other interaction measures such as the relative gain array (RGA), the Hankel interaction index array (HIIA) and the participation matrix (PM). The new strategies are easy to interpret and give suitable pairing recommendations where other methods may fail.} } @TechReport{ it:2011-031, author = {H{\aa}kan Selg}, title = {Om nya kontaktm{\"o}nster i arbetslivet: {F}acebook, {L}inked{I}n, {T}witter}, institution = it, department = mdi, year = 2011, number = {2011-031}, month = dec, note = {In Swedish.}, abstract = {Under senare {\aa}r har telefoni och e-post kompletterats med nya digitala kontaktverktyg; SMS, och direktmeddelanden ("chatt"), sociala medier som Facebook och LinkedIn och mikrobloggen Twitter. F{\"o}r r{\"o}stsamtal finns numera ocks{\aa} IP-telefoni. I en pilotstudie fr{\aa}n 2010 framkom att nya kontaktm{\"o}nster h{\aa}ller p{\aa} att etableras i sp{\aa}ren av de nya verktygen. I syfte att kartl{\"a}gga anv{\"a}ndningen av de olika verktygen genomf{\"o}rdes under mars 2011 en enk{\"a}tunders{\"o}kning bland Dataf{\"o}reningens medlemmar. Resultaten av digitala kontaktverktyg redovisas i tv{\aa} rapporter; en redan publicerad som behandlar kontaktverktyg f{\"o}r en eller ett f{\aa}tal mottagare och den f{\"o}religgande som tar upp verktyg som riktar sig till ett st{\"o}rre antal mottagare. Sociala medier har n{\aa}tt stort genomslag i den unders{\"o}kta gruppen d{\"a}r de allra flesta har skaffat konton p{\aa} b{\aa}de Facebook och LinkedIn. M{\"o}nstren i anv{\"a}ndningen skiljer sig d{\"a}remot {\aa}t. F{\"o}r en majoritet anv{\"a}ndarna {\"a}r Facebook ett verktyg f{\"o}r kontakter och utbyte av information som {\"a}r relaterat till privatsf{\"a}ren."V{\"a}nkretsen" p{\aa} Facebook domineras av privata kontakter men med inslag av personer med koppling till yrkeslivet. Kvinnor anv{\"a}nder Facebook mer {\"a}n vad m{\"a}n g{\"o}r. En h{\"o}gre andel kvinnor har konto p{\aa} Facebook, har fler v{\"a}nner och l{\"a}ser oftare av sitt konto och g{\"o}r egna bidrag och statusuppdateringar. P{\aa} LinkedIn dominerar ist{\"a}llet professionella "connections". Yrkesrelaterade inl{\"a}gg och statusuppdateringar {\"a}r i klar majoritet. F{\"o}r de allra flesta handlar det om ett passivt engagemang. {\AA}tskilliga dagar f{\"o}rflyter mellan bes{\"o}ken och {\"a}nnu fler mellan statusuppdateringarna. En mindre grupp utnyttjar dock LinkedIn som plattform f{\"o}r diskussioner i professionella angel{\"a}genheter. P{\aa} LinkedIn {\"a}r m{\"a}nnen mera aktiva {\"a}n kvinnorna vad g{\"a}ller s{\aa}v{\"a}l bes{\"o}k som statusuppdateringar. Twitter visar sig inte ha mycket gemensamt med Facebook och LinkedIn. Endast var fj{\"a}rde av de tillfr{\aa}gade har skaffat sig konto. Det finns en k{\"a}rna av engagerade twittrare {\"a}ven om majoriteten {\"a}r mer eller mindre passiva. M{\aa}nga av dem som skaffat konto verkar os{\"a}kra om vad det ska anv{\"a}ndas till. Id{\'e}n om sociala medier som plattformar och fora f{\"o}r aktivt informationsutbyte och diskussion f{\aa}r bara delvis st{\"o}d i materialet. Det dominerande intrycket {\"a}r att ett f{\"o}rh{\aa}llandevis begr{\"a}nsat antal anv{\"a}ndare svarar f{\"o}r merparten av inl{\"a}ggen.} } @TechReport{ it:2011-030, author = {P. Boyanova and I. Georgiev and S. Margenov and L. Zikatanov}, title = {Multilevel Preconditioning of Graph-{L}aplacians: Polynomial Approximation of the Pivot Blocks Inverses}, institution = it, department = tdb, year = 2011, number = {2011-030}, month = nov, abstract = {We consider the discrete system resulting from mixed finite element approximation of a second-order elliptic boundary value problem with Crouzeix-Raviart non-conforming elements for the vector valued unknown function and piece-wise constants for the scalar valued unknown function. Since the mass matrix corresponding to the vector valued variables is diagonal, these unknowns can be eliminated exactly. Thus, the problem of designing an efficient algorithm for the solution of the resulting algebraic system is reduced to one of constructing an efficient algorithm for a system whose matrix is a graph-Laplacian (or weighted graph-Laplacian). We propose a preconditioner based on an algebraic multilevel iterations (AMLI) algorithm. The hierarchical two-level transformations and the corresponding $2\times 2$ block splittings of the graph-Laplacian needed in an AMLI algorithm are introduced locally on macroelements. Each macroelement is associated with an edge of a coarser triangulation. To define the action of the preconditioner we employ polynomial approximations of the inverses of the pivot blocks in the $2\times 2$ splittings. Such approximations are obtained via the best polynomial approximation of $x^{-1}$ in $L_{\infty}$ norm on a finite interval. Our construction provides sufficient accuracy and moreover, guarantees that each pivot block is approximated by a positive definite matrix polynomial. One possible application of the constructed efficient preconditioner is in the numerical solution of unsteady Navier-Stokes equations by a projection method. It can also be used to design efficient solvers for problems corresponding to other mixed finite element discretizations.} } @TechReport{ it:2011-029, author = {Alberto Ros and Stefanos Kaxiras}, title = {{VIPS}: Simple Directory-Less Broadcast-Less Cache Coherence Protocol}, institution = it, department = docs, year = 2011, number = {2011-029}, month = nov, abstract = {Coherence in multicores introduces complexity and overhead (directory, state bits) in exchange for local caching, while being ``invisible'' to the memory consistency model. In this paper we show that a much simpler (directory-less/broadcast-less) multicore coherence provides almost the same performance without the complexity and overhead of a directory protocol. Motivated by recent efforts to simplify coherence for disciplined parallelism, we propose a hardware approach that does not require any application guidance. The cornerstone of our approach is a run-time, application-transparent, division of data into private and shared at a page-level granularity. This allows us to implement a dynamic write-policy (write-back for private, write-through for shared), simplifying the protocol to just two stable states. Self-invalidation of the shared data at synchronization points allows us to remove the directory (and invalidations) completely, with just a data-race-free guarantee (at the write-through granularity) from software. Allowing multiple simultaneous writers and merging their writes, relaxes the DRF guarantee to a word granularity and optimizes traffic. This leads to our main result: a virtually costless coherence that uses the same simple protocol for both shared, DRF data and private data (differentiating only in the timing of when to put data back in the last-level cache) while at the same time approaching the performance (within 3\%) of a complex directory protocol. } } @TechReport{ it:2011-028, author = {Gunnika Isaksson-Luttemann and Bengt Sandblad and Arne W. Andersson and Simon Tchirner}, title = {Reducing Unnecessary Cognitive Load in Traffic Control}, institution = it, department = hci, year = 2011, number = {2011-028}, month = nov, abstract = {Uppsala University has collaborated with Swedish National Railway Administration in research about train traffic control and how to improve traffic controllers' work environment, so that they can better meet future demands. This has resulted in a new operational train traffic control system called STEG. The traffic controllers are today forced to develop and use very complex mental models which take a long time to learn. We have also found that their cognitive capacity is more used to indentify, understand and analyze the traffic situation and less to solve problems and find optimal solutions to disturbances. The objective for developing STEG was to change this situation and reduce unnecessary cognitive load. Interviews with traffic controllers show that STEG has reduced the complexity of their mental models and contributed to less unnecessary cognitive load in operation. Our conclusion is that by reducing the complexity of their mental model, they can be skilled much faster and they are now able to use their cognitive capacity and skills on the important parts of their work. } } @TechReport{ it:2011-027, author = {Gunnika Isaksson-Lutteman and Arvid Kauppi and Arne W Andersson and Bengt Sandblad and Mikael Erlandsson}, title = {Operative Tests of a New System for Train Traffic Control}, institution = it, department = hci, year = 2011, number = {2011-027}, month = nov, abstract = {Tomorrow's train traffic systems requires new strategies and solutions for efficient train traffic control and utilization of track capacity, especially in traffic systems with a high degree of deregulated and mixed traffic. There are many different goals associated with the traffic control tasks and the work of the traffic controllers (dispatchers). Examples are safety, efficiency of the traffic with regard to timeliness and energy consumption, good service and information to passengers and customers etc. Today's traffic control systems and user interfaces do not efficiently support such goals. In earlier research we have analyzed important aspects of the traffic controller's tasks, strategies, decision making, use of information and support systems etc. Based on this research we, together with Banverket (Swedish Rail Administration), have designed prototype systems and interfaces that better can meet future goals and contribute to more optimal use of infrastructure capacity.} } @TechReport{ it:2011-026, author = {Martin Kronbichler and Gunilla Kreiss}, title = {A Hybrid Level-Set-Phase-Field Method for Two-Phase Flow with Contact Lines}, institution = it, department = tdb, year = 2011, number = {2011-026}, month = dec, abstract = {We present a hybrid method combining a level set model with a phase field model. Contact line dynamics are represented by the full phase field model, whereas the convective interface transport and the evaluation of surface tension in the bulk of the domain are realized by a conservative level set model. This combination avoids the singularity at the contact line that is present in classical level set models with no-slip boundary conditions. The hybrid method relies on the similar shape of the concentration variable in the phase field model and the level set function. The terms specific to the phase field model are disabled away from the contact region by using a switch function. The benefits of using the level set model instead of the phase field model in the bulk of the domain are quantified by two extensive numerical benchmark computations. The conservative level set method gives considerably higher accuracy at the same mesh size than the phase field method. For the phase field method, a range of interface thicknesses and the mobilities are considered. The best values of these parameters for the two test cases are used in the comparison. Further, the well-posedness and stability of the hybrid method are shown using an a priori energy estimate. Numerical simulation of channel flow with the hybrid method demonstrates the increase in computational efficiency compared to a plain phase field model.} } @TechReport{ it:2011-025, author = {Martin Kronbichler and Katharina Kormann}, title = {A Generic Interface for Parallel Cell-Based Finite Element Operator Application}, institution = it, department = tdb, year = 2011, number = {2011-025}, month = nov, abstract = {We present a memory-efficient and parallel framework for finite element operator application implemented in the generic open-source library deal.II. Instead of assembling a sparse matrix and using it for matrix-vector products, the operation is applied by cell-wise quadrature. The evaluation of shape functions is implemented with a sum-factorization approach. Our implementation is parallelized on three levels to exploit modern supercomputer architecture in an optimal way: MPI over remote nodes, thread parallelization with dynamic task scheduling within the nodes, and explicit vectorization for utilizing processors' vector units. Special data structures are designed for high performance and to keep the memory requirements to a minimum. The framework handles adaptively refined meshes and systems of partial differential equations. We provide performance tests for both linear and nonlinear PDEs which show that our cell-based implementation is faster than sparse matrix-vector products for polynomial order two and higher on hexahedral elements and yields ten times higher Gflops rates.} } @TechReport{ it:2011-024, author = {Martin Kronbichler and Claudio Walker and Gunilla Kreiss and Bernhard M{\"u}ller }, title = {Multiscale Modeling of Capillary-Driven Contact Line Dynamics}, institution = it, department = tdb, year = 2011, number = {2011-024}, month = nov, abstract = {We present a multiscale method to simulate the flow of two immiscible incompressible fluids in contact with solids. The macro model in our method is based on a level set method with sharp interface treatment. The contact line is tracked explicitly and moves according to a slip velocity that depends on the apparent contact angle of the interface with the solid. The relation between apparent contact angle and slip velocity is determined in a micro model based on the phase field method. The phase field method seeks for an equilibrium slip velocity in a box around the contact point, given a static contact angle at the solid and the apparent contact angle in the far field. The dimensions of the box are chosen in the range of physical diffusion length scales at the contact point. We present numerical results of the multiscale method for capillary-driven flows which demonstrate the convergence of results in the macro model and compare the behavior with other approaches in contact line dynamics. } } @TechReport{ it:2011-023, author = {Hossein Bani-Hashemian and Stefan Hellander and Per L{\"o}tstedt}, title = {Efficient Sampling in Event-Driven Algorithms for Reaction-Diffusion Processes}, institution = it, department = tdb, year = 2011, number = {2011-023}, month = oct, abstract = {In event-driven algorithms for simulation of diffusing, colliding, and reacting particles, new positions and events are sampled from the cumulative distribution function (CDF) of a probability distribution. The distribution is sampled frequently and it is important for the efficiency of the algorithm that the sampling is fast. The CDF is known analytically or computed numerically. Analytical formulas are sometimes rather complicated making them difficult to evaluate. The CDF may be stored in a table for interpolation or computed directly when it is needed. Different alternatives are compared for chemically reacting molecules moving by Brownian diffusion in two and three dimensions. The best strategy depends on the dimension of the problem, the length of the time interval, the density of the particles, and the number of different reactions.} } @TechReport{ it:2011-022, author = {Magnus Gustafsson and Anna Nissen and Katharina Kormann}, title = {Stable Difference Methods for Block-Structured Adaptive Grids}, institution = it, department = tdb, year = 2011, number = {2011-022}, month = oct, abstract = {The time-dependent Schrödinger equation describes quantum dynamical phenomena. Solving it numerically, the small-scale interactions that are modeled require very fine spatial resolution. At the same time, the solutions are localized and confined to small regions in space. Using the required resolution over the entire high-dimensional domain often makes the model problems intractable due to the prohibitively large grids that result from such a discretization. In this paper, we present a block-structured adaptive mesh refinement scheme, aiming at efficient adaptive discretization of high-dimensional partial differential equations such as the time-dependent Schrödinger equation. Our framework allows for anisotropic grid refinement in order to avoid unnecessary refinement. For spatial discretization, we use standard finite difference stencils together with summation-by-parts operators and simultaneous-approximation-term interface treatment. We propagate in time using exponential integration with the Lanczos method. Our theoretical and numerical results show that our adaptive scheme is stable for long time integrations. We also show that the discretizations meet the expected convergence rates.} } @TechReport{ it:2011-021, author = {H{\aa}kan Selg}, title = {Om nya kontaktm{\"o}nster i arbetslivet: {SMS}, chatt, e-post och telefoni - N{\"a}r anv{\"a}nds vad?}, institution = it, department = hci, year = 2011, number = {2011-021}, month = oct, note = {In Swedish.}, abstract = {Under senare {\aa}r har telefoni och e-post kompletterats med nya digitala kontaktverktyg; SMS, och direktmeddelanden ("chatt"), sociala medier som Facebook och LinkedIn och mikrobloggen Twitter. F{\"o}r r{\"o}stsamtal finns numera ocks{\aa} IP-telefoni. I en pilotstudie fr{\aa}n 2010 framkom att nya kontaktm{\"o}nster h{\aa}ller p{\aa} att etableras i sp{\aa}ren av de nya verktygen. I syfte att kartl{\"a}gga anv{\"a}ndningen av de olika verktygen genomf{\"o}rdes under mars 2011 en enk{\"a}tunders{\"o}kning bland Dataf{\"o}reningens medlemmar. Den h{\"a}r rapporten behandlar kontaktverktygen f{\"o}r en eller ett f{\aa}tal mottagare och d{\"a}r det huvudsakliga syftet med kontakten {\"a}r dialog. I en kommande rapport behandlas sociala medier, dvs. kommunikation med ett st{\"o}rre antal utvalda mottagare. Resultaten pekar mot ett relativt specialiserat kontaktm{\"o}nster i yrkesrelaterade sammanhang d{\"a}r e-post och mobiltelefoni utg{\"o}r standardverktygen f{\"o}r text- respektive r{\"o}stmeddelanden. De {\"o}vriga verktygen uppvisar av st{\"o}rre variation i anv{\"a}ndarm{\"o}nstren. Anv{\"a}ndningen av direktmeddelanden {\"a}r p{\aa} v{\"a}g upp{\aa}t och har redan passerat fast telefoni f{\"o}r yrkesrelaterade kontakter. H{\"a}r dominerar de yngre anv{\"a}ndarna. Den privata anv{\"a}ndningen k{\"a}nnetecknas dels av en bredare repertoar i valet av kontaktverktyg, dels tydligare tendenser i anv{\"a}ndarm{\"o}nstren. SMS och direktmeddelanden utnyttjas mer av kvinnor {\"a}n av m{\"a}n. Vidare ger {\aa}ldersfaktorn ett starkare utslag vid privat anv{\"a}ndning. Slutsatsen blir d{\"a}rf{\"o}r att anv{\"a}ndarm{\"o}nstren vid privata kontakter utm{\"a}rks av st{\"o}rre heterogenitet. Anv{\"a}ndningen av fast telefoni minskar successivt. Tendensen {\"a}r generell men processen sker snabbare i arbetslivet. I likhet med chatten m{\"a}rks en {\aa}ldersrelaterad tendens men med omv{\"a}nda f{\"o}rtecken. Yngre anv{\"a}ndare {\"o}verger i st{\"o}rre utstr{\"a}ckning den fasta telefonin. De tillfr{\aa}gade ombads rangordna de olika kontaktverktygens l{\"a}mplighet vid ett antal givna situationer. Det visade sig d{\aa} att telefonsamtalet, oavsett det sker via fast, mobil eller IP-telefoni, rankas h{\"o}gt i alla situationer. Lika popul{\"a}r i yrkessammanhang {\"a}r e-posten som dessutom har den f{\"o}rdelen att kontakten dokumenteras. Detta {\"a}r framf{\"o}r allt uppskattat n{\"a}r syftet {\"a}r att tr{\"a}ffa en {\"o}verenskommelse. S{\aa}v{\"a}l SMS som direktmeddelanden bed{\"o}ms l{\"a}mpliga vid informella situationer och i kontakter med personer man k{\"a}nner v{\"a}l. Vid br{\aa}dskande {\"a}renden rankas SMS som n{\"a}st b{\"a}sta alternativet efter telefonsamtal. Valet av kontaktverktyg p{\aa}verkas i liten grad av om kontakten sker i yrket eller i privata sammanhang. Mer styrande {\"a}r andra omst{\"a}ndigheter , t.ex. om kontakten {\"a}r av formell eller informell natur eller om {\"a}rendet {\"a}r br{\aa}dskande. St{\"o}rst roll spelar hur pass v{\"a}l man k{\"a}nner personen som ska kontaktas, inbegripet vilka kontaktverktyg hon eller han k{\"a}nner sig hemmastadd med.} } @TechReport{ it:2011-020, author = {Natasha Flyer and Erik Lehto and S{\'e}bastien Blaise and Grady B. Wright and Amik St-Cyr}, title = {{RBF}-Generated Finite Differences for Nonlinear Transport on a Sphere: Shallow Water Simulations}, institution = it, department = tdb, year = 2011, number = {2011-020}, month = sep, abstract = {The current paper establishes the computational efficiency and accuracy of the RBF-FD method for large-scale geoscience modeling with comparisons to state-of-the-art methods as high-order discontinuous Galerkin and spherical harmonics, the latter using expansions with close to 300,000 bases. The test cases are demanding fluid flow problems on the sphere that exhibit numerical challenges, such as Gibbs phenomena, sharp gradients, and complex vortical dynamics with rapid energy transfer from large to small scales over short time periods. The computations were possible as well as very competitive due to the implementation of hyperviscosity on large RBF stencil sizes (corresponding roughly to 6th to 9th order methods) with up to O($10^5$) nodes on the sphere. The RBF-FD method scaled as O($N$) per time step, where $N$ is the total number of nodes on the sphere.} } @TechReport{ it:2011-019, author = {Qaisar Abbas and Jan Nordstr{\"o}m}, title = {A Weak Boundary Procedure for High Order Finite Difference Approximations of Hyperbolic Problems}, institution = it, department = tdb, year = 2011, number = {2011-019}, month = sep, abstract = {We introduce a new weak boundary procedures for high order finite difference operators on summation-by-parts type applied to hyperbolic problems. The boundary procedure is applied in an extended domain where data is known. We show how to raise the order of accuracy for a diagonal norm based approximation and how to modify the spectrum of the resulting operator to get a faster convergence to steady-state. Furthermore, we also show how to construct better non-reflecting properties at the boundaries using the above procedure. Numerical results that corroborate the analysis are presented.} } @TechReport{ it:2011-018, author = {Magnus Johansson and Bj{\"o}rn Victor and Joachim Parrow}, title = {Computing Strong and Weak Bisimulations for Psi-Calculi -- with proofs}, institution = it, department = csd, year = 2011, number = {2011-018}, month = aug, abstract = {We present a symbolic transition system and strong and weak bisimulation equivalences for psi-calculi, and show that they are fully abstract with respect to bisimulation congruences in the non-symbolic semantics. A procedure which computes the most general constraint under which two agents are bisimilar is developed and proved correct. A psi-calculus is an extension of the pi-calculus with nominal data types for data structures and for logical assertions representing facts about data. These can be transmitted between processes and their names can be statically scoped using the standard pi-calculus mechanism to allow for scope migrations. Psi-calculi can be more general than other proposed extensions of the pi-calculus such as the applied pi-calculus, the spi-calculus, the fusion calculus, or the concurrent constraint pi-calculus. Symbolic semantics are necessary for an efficient implementation of the calculus in automated tools exploring state spaces, and the full abstraction property means the symbolic semantics makes exactly the same distinctions as the original. } } @TechReport{ it:2011-017, author = {Anna Nissen and Gunilla Kreiss and Margot Gerritsen}, title = {Stability at Nonconforming Grid Interfaces for a High Order Discretization of the {S}chr{\"o}dinger Equation}, institution = it, department = tdb, year = 2011, number = {2011-017}, month = aug, note = {Typos corrected October 7, 2011.}, abstract = {In this paper we extend the results from our earlier work on stable boundary closures for the Schr{\"o}dinger equation using the summation-by-parts-simultaneous approximation term (SBP--SAT) method to include stability and accuracy at non-conforming grid interfaces. Stability at the grid interface is shown by the energy method, and the estimates are generalized to multiple dimensions. The accuracy of the grid interface coupling is investigated using normal mode analysis for operators of 2nd and 4th order formal interior accuracy. We show that full accuracy is retained for the 2nd and 4th order operators. The accuracy results are extended to 6th and 8th order operators by numerical simulations, in which case two orders of accuracy is gained with respect to the lower order approximation close to the interface.} } @TechReport{ it:2011-016, author = {Elena Sundkvist and Elisabeth Larsson}, title = {Implementation of a Collocated Boundary Element Method for Acoustic Wave Propagation in Multilayered Fluid Media}, institution = it, year = 2011, number = {2011-016}, month = jun, abstract = {The numerical solution of the Helmholtz equation with nonlocal radiation boundary conditions is considered. We model sound wave propagation in a multilayered piecewise homogeneous medium. A fourth-order accurate collocated, boundary element method is used, where the solution inside the domain is computed through a representation integral. The method is shown to have the expected fourth-order convergence and is also compared with a fourth-order accurate finite difference method. } } @TechReport{ it:2011-015, author = {Elena Sundkvist and Kurt Otto}, title = {Discretization of a Hybrid Model for Acoustic Wave Propagation in Layered Fluid Media}, institution = it, department = tdb, year = 2011, number = {2011-015}, month = jun, abstract = {The objective is to construct and discretize (to fourth-order accuracy) a hybrid model for waveguide problems, where a finite difference method for inhomogeneous layers is coupled to a boundary element method (BEM) for a set of homogeneous layers. Such a hybrid model is adequate for underwater acoustics in complicated environments. Waveguides with either plane or axial symmetry are treated, which leads to (algebraically) two-dimensional problems. The main focus is on the collocated BEM.}, note = {Updated 2011-08-08.} } @TechReport{ it:2011-014, author = {Anna Nissen and Gunilla Kreiss and Margot Gerritsen}, title = {High Order Stable Finite Difference Methods for the {S}chr{\"o}dinger Equation}, institution = it, department = tdb, year = 2011, number = {2011-014}, month = may, abstract = {In this paper we extend the Summation-by-parts-simultaneous approximation term (SBP-SAT) technique to the Schr{\"o}dinger equation. Stability estimates are derived and the accuracy of numerical approximations of interior order 2m, m=1, 2, 3, are analyzed in the case of Dirichlet boundary conditions. We show that a boundary closure of the numerical approximations of order m lead to global accuracy of order m+2. The results are supported by numerical simulations.} } @TechReport{ it:2011-013, author = {Hans Norlander}, title = {On the Impact of Model Accuracy for Active Damping of a Viscoelastic Beam}, institution = it, department = syscon, year = 2011, number = {2011-013}, month = may, abstract = {How much do model errors influence the closed loop performance? This question is investigated for the control problem of designing active damping of vibrations in a viscoelastic cantilever beam. The beam is physically modeled using Fourier transformed Euler-Bernoulli beam equations. For control design the standard LQG technique is employed, and two different finite order models are used. One is based on truncated modal analysis of the system, and the other model is numerically fitted with respect to the frequency response of the physically theoretical model. The so obtained controllers are evaluated on the physical model with respect to disturbance attenuation and robustness for stability. It is found that all controllers stabilizes the system and attenuates the vibrations, but the controllers based on the numerically fitted model perform notably better than those based on truncated modal analysis.} } @TechReport{ it:2011-012, author = {Jens Lindstr{\"o}m and Jan Nordstr{\"o}m}, title = {Stable {R}obin Boundary Conditions for the {N}avier-{S}tokes Equations}, institution = it, department = tdb, year = 2011, number = {2011-012}, month = apr, abstract = {In this paper we prove stability of Robin solid wall boundary conditions for the compressible Navier-Stokes equations. Applications include the no-slip boundary conditions with prescribed temperature or temperature gradient and the first order slip-flow boundary conditions. The formulation is uniform and the transitions between different boundary conditions are done by a change of parameters. We give different sharp energy estimates depending on the choice of parameters. The discretization is done using finite differences on Summation-By-Parts form with weak boundary conditions using the Simultaneous Approximation Term. We verify convergence by the method of manufactured solutions and show computations of flows ranging from no-slip to substantial slip.} } @TechReport{ it:2011-011, author = {Petia Boyanova and Minh Do-Quang and Maya Neytcheva}, title = {Efficient Preconditioners for Large Scale Binary {C}ahn-{H}illiard Models}, institution = it, department = tdb, year = 2011, number = {2011-011}, month = apr, abstract = {Abstract: In this work we consider preconditioned iterative solution methods for numerical simulations of multiphase flow problems, modelled by the Cahn-Hilliard equation. We focus on diphasic flows and the construction and efficiency of a preconditioner for the algebraic systems arising from finite element discretizations in space and the $\theta$-method in time. The preconditioner utilizes to a full extent the algebraic structure of the underlying matrices and exhibits optimal convergence and computational complexity properties. Large scale umerical experiments are included as well as performance comparisons with other solution methods. } } @TechReport{ it:2011-010, author = {Linda {\AA}mand}, title = {Control of Aeration Systems in Activated Sludge Plants - a review}, institution = it, department = syscon, year = 2011, number = {2011-010}, month = apr, abstract = {This review attempts to summarise and categorise research performed within the field of control of continuous aeration systems in municipal wastewater treatment plants over the last ten years. The review covers research into various methods to both decide and track the dissolved oxygen set-point but also the control of the total aerated volume. With respect to dissolved oxygen set-point control and determination, the strategies used for control span from modifications and developments of simple control methods that have been explored since the 1970's, to advanced control such as model-based predictive and feedback controllers. Also, fuzzy logic control has gained more interest in the control of continuous processes and is utilised both in the context of deciding and tracking set-points, but also to control the total aerobic volume. The review is supplemented by a discussion on what level of complexity is required for an aeration control system together with a summary of comparisons between control strategies evaluated in full-scale, pilot scale and in simulations.} } @TechReport{ it:2011-009, author = {Owe Axelsson and Maya Neytcheva}, title = {Operator Splittings for Solving Nonlinear, Coupled Multiphysics Problems with an Application to the Numerical Solution of an Interface Problem}, institution = it, department = tdb, year = 2011, number = {2011-009}, month = apr, abstract = {This work summarizes operator splitting methods to solve various kinds of coupled multiphysics problems. Such coupled problems are usually stiff. Furthermore, one is often interested in obtaining stationary solutions, which require integration over long time intervals. Therefore, an implicit and stable time-stepping method of at least second order of accuracy must be used, to allow for larger timesteps. To control the operator splitting errors for nonlinear problems, an approximate Newton solution method is proposed for each separate equation. After completion of some (normally few) Newton steps, the equations are updated with the current solution, thereby preparing for the next sequence of Newton steps. An application for a nonlinear model of interface tracking problem arising in a multiphase flow is described. Hereby an inner--outer iterative solution method with a proper preconditioning for solving the arising linearized algebraic equations, which results in few iterations, is analyzed. There is no need to update the preconditioner during the iterations.} } @TechReport{ it:2011-008, author = {Kenneth Duru and Ken Mattsson and Gunilla Kreiss}, title = {Stable and Conservative Time Propagators for Second Order Hyperbolic Systems}, institution = it, department = tdb, year = 2011, number = {2011-008}, month = apr, abstract = {In this paper we construct a hierarchy of arbitrary high (even) order accurate explicit time propagators for semi-discrete second order hyperbolic systems. An accurate semi-discrete problem is obtained by approximating the corresponding spatial derivatives using high order accurate finite difference operators satisfying the summation by parts rule. In order to obtain a strictly stable semi-discrete problem, boundary conditions are imposed weakly using the simultaneous approximation term method. The time discretization starts with a second order central difference scheme, then using the modified equation approach (even in the presence of a first order derivative in time) we derive arbitrary high order accurate time marching schemes. For the fully discrete problem, we introduce a suitable weighted inner product and use the energy method to derive an optimal CFL condition, which provides a useful and rigorous criterion for stability. Numerical examples are also provided. } } @TechReport{ it:2011-007, author = {Vasileios Spiliopoulos and Stefanos Kaxiras and Georgios Keramidas}, title = {A Framework for Continuously Adaptive {DVFS}}, institution = it, department = docs, year = 2011, number = {2011-007}, month = mar, abstract = {We present Continuously Adaptive Dynamic Voltage-Frequency Scaling in Linux systems running on Intel i7 and AMD Phenom II processors. By exploiting slack, inherent in memory-bound programs, our approach aims to improve power efficiency even when the processor does not sit idle. Our underlying methodology is based on a simple first-order processor performance model in which frequency scaling is expressed as a change (in cycles) of the main memory latency. Utilizing available performance monitoring hardware, we show that our model is powerful enough to i) predict with reasonable accuracy the effect of frequency scaling (in terms of performance loss), and ii) predict the energy consumed by the core under different V/f combinations. To validate our approach we perform high-accuracy, fine-grain, power measurements directly on the off-chip voltage regulators. We use our model to implement various DVFS policies as Linux ~green~ governors to continuously optimize for various power- efficiency metrics such as EDP (Energy-Delay Product) or ED2P (Energy-Delay-Square Product), or achieve energy savings with a user-specified limit on performance loss. Our evaluation shows that, for SPEC2006 workloads, our governors achieve dynamically the same optimal EDP or ED2P (within 2\% on average) as an exhaustive search of all possible frequencies and supply voltages. Energy savings can reach up to 56\% in memory-bound workloads with corresponding improvements of about 55\% for EDP or ED2P. } } @TechReport{ it:2011-006, author = {Xin He and Maya Neytcheva}, title = {Preconditioning the Incompressible {N}avier-{S}tokes Equations with Variable Viscosity}, institution = it, department = tdb, year = 2011, number = {2011-006}, month = mar, note = {Updated April 5, 2011.}, abstract = {This paper deals with preconditioners for the iterative solution of the discrete Oseen's problem with variable viscosity. The motivation of this work originates from numerical simulations of multiphase flow, governed by the coupled Cahn-Hilliard and incompressible Navier-Stokes equations. The impact of variable viscosity on some known preconditioning technique is analyzed. Numerical experiments show that the preconditioning technique for the Oseen's problem with constant viscosity is also efficient when the viscosity is varying.} } @TechReport{ it:2011-005, author = {Andreas Hellander and Stefan Hellander and Per L{\"o}tstedt}, title = {Coupled Mesoscopic and Microscopic Simulation of Stochastic Reaction-Diffusion Processes in Mixed Dimensions}, institution = it, department = tdb, year = 2011, number = {2011-005}, abstract = {We present a new simulation algorithm that allows for dynamic switching between a mesoscopic and a microscopic modeling framework for stochastic reaction-diffusion kinetics. The more expensive and more accurate microscopic model is used only for those species and in those regions in space where there is reason to believe that a microscopic model is needed to capture the dynamics correctly. The microscopic algorithm is extended to simulation on curved surfaces in order to model reaction and diffusion on membranes. The accuracy of the method on and near a spherical membrane is analyzed and evaluated in a numerical experiment. Two biologically motivated examples are simulated in which the need for microscopic simulation of parts of the system arises for different reasons. First, we apply the method to a model of the phosphorylation reactions in a MAPK signaling cascade where microscale methods are necessary to resolve fast rebinding events. Then a model is considered for transport of a species over a membrane coupled to reactions in the bulk. The new algorithm attains an accuracy similar to a full microscopic simulation by handling critical interactions on the microscale, but at a significantly reduced cost by using the mesoscale framework for most parts of the biological model.} } @TechReport{ it:2011-004, author = {Petia Boyanova and Minh Do-Quang and Maya Neytcheva}, title = {Solution Methods for the {C}ahn-{H}illiard Equation Discretized by Conforming and Non-Conforming Finite Elements}, institution = it, department = tdb, year = 2011, number = {2011-004}, month = mar, abstract = {In this work we consider preconditioned iterative solution methods for numerical simulations of multiphase flow problems, modelled by the Cahn-Hilliard equation. We focus on the construction and efficiency of various preconditioning techniques and the effect of two discretization methods - conforming and non-conforming finite elements spaces - on those techniques.} } @TechReport{ it:2011-003, author = {Brian Drawert and Stefan Engblom and Andreas Hellander}, title = {{URDME} v. 1.1: User's manual}, institution = it, department = tdb, year = 2011, number = {2011-003}, month = mar, note = {The latest version of URDME can be obtained from \url{www.urdme.org}. For earlier versions of URDME consult Technical Report 2008-022.}, abstract = {We have developed URDME, a general software for simulation of stochastic reaction-diffusion processes on unstructured meshes. This allows for a more flexible handling of complicated geometries and curved boundaries compared to simulations on structured, cartesian meshes. The underlying algorithm is the next subvolume method (NSM), extended to unstructured meshes by obtaining jump coefficients from the finite element formulation of the corresponding macroscopic equation. This manual describes the changes in the software compared to the previous version, URDME 1.0. Notable changes include support for Comsol Multiphysics 3.5a and a stronger decoupling between core simulation routines and the Matlab interface by making core solvers stand-alone executables. This opens up for more flexible simulation workflows when many realizations of the stochastic process need to be generated.} } @TechReport{ it:2011-002, author = {Johannes Nygren and Bengt Carlsson}, title = {Benchmark Simulation Model no. 1 with a Wireless Sensor Network for Monitoring and Control}, institution = it, department = syscon, year = 2011, number = {2011-002}, month = jan, abstract = {The implementation of wireless sensor networks (WSNs) for wastewater treatment plant monitoring and control is rare in the literature. Nevertheless, there seem to be some good reasons for considering this application, since WSNs can easily supply several point measurements and keep track on a single variable over a wide area. This report describes a merged simulator, which is a fusion between a SimuLink implementation of Benchmark Simulation Model no. 1 (BSM1) and Prowler (a probabilistic WSN simulator). This makes it possible to simulate the relevant aspects of a WSN and the effects of implementing it for automatic control of a wastewater treatment plant. Also, a simple energy model is added to keep track on the WSN energy consumption.} } @TechReport{ it:2010-031, author = {Bengt Carlsson}, title = {Constant is Optimal - a Strategy for Resource Efficient Control of a Class of Processes}, institution = it, department = syscon, year = 2010, number = {2010-031}, month = dec, abstract = {In this technical report we consider the problem on how to control a class of processes in a resource/energy optimal way. The optimization is done under the constraint that the mean value of the output signal (typically an effluent discharge) should be below a certain threshold. A key assumption is that the magnitude of the control signal is proportional to the quantity that should be minimized (for example, consumption of energy or chemicals) and that the process efficiency decreases with increasing magnitude of the control signal. We show that for a wide class of processes it is optimal to keep the input signal constant. The result is illustrated in a simple numerical example. } } @TechReport{ it:2010-030, author = {Jens Lindstr{\"o}m and Jan Nordstr{\"o}m}, title = {Spectral Analysis of the Continuous and Discretized Heat and Advection Equation on Single and Multiple Domains}, institution = it, department = tdb, year = 2010, number = {2010-030}, month = dec, abstract = {In this paper we study the heat and advection equation in single and multiple domains. We discretize using a second order accurate finite difference method on Summation-By-Parts form with weak boundary and interface conditions. We derive analytic expressions for the spectrum of the continuous problem and for their corresponding discretization matrices. We show how the spectrum of the single domain operator is contained in the multi domain operator spectrum when artificial interfaces are introduced. We study the impact on the spectrum and discretization errors depending on the interface treatment and verify that the results are carried over to higher order accurate schemes.} } @TechReport{ it:2010-029, author = {Owe Axelsson and Maya Neytcheva}, title = {A General Approach to Analyse Preconditioners for Two-by-Two Block Matrices}, institution = it, department = tdb, year = 2010, number = {2010-029}, month = nov, abstract = {Two-by-two block matrices arise in various applications, such as in domain decomposition methods or, more generally, when solving boundary value problems discretized by finite elements from the separation of the node set of the mesh into 'fine' and 'coarse' nodes. Matrices with such a structure, in saddle point form arise also in mixed variable finite element methods and in constrained optimization problems. A general algebraic approach to construct, analyse and control the accuracy of preconditioners for matrices in two-by-two block form is presented. This includes both symmetric and nonsymmetric matrices, as well as indefinite matrices. The action of the preconditioners can involve element-by-element approximations and/or geometric or algebraic multigrid/multilevel methods.} } @TechReport{ it:2010-028, author = {H{\aa}kan Selg}, title = {Om professionell anv{\"a}ndning av sociala medier och andra digitala kanaler}, institution = it, department = hci, year = 2010, number = {2010-028}, month = nov, abstract = {Ny teknik i kombination med sociala processer st{\"a}ller dagens ledare inf{\"o}r en rad utmaningar. Inneh{\aa}llet i rapporten baseras p{\aa} 20 expertsamtal d{\"a}r experternas erfarenheter och intressen varit styrande. Resultatet har blivit en bred {\"o}versikt som dels kan tj{\"a}na som utg{\aa}ngspunkt f{\"o}r f{\"o}rdjupade studier, dels som ett aktuellt diskussionsunderlag f{\"o}r personer inom ber{\"o}rda verksamheter.} } @TechReport{ it:2010-027, author = {Arnold Pears and Carsten Schulte}, title = {Proceedings of the 9th Koli Calling International Conference on Computing Education Research}, institution = it, department = docs, year = 2010, number = {2010-027}, month = nov, note = {Typographical updates 2010-11-25.}, abstract = {The 9th Koli Calling conference collected together reseachers from Europe, the United States and Asia-Pacific to discuss and present research which contributes to a richer education for students of computing worldwide. Working versions of the papers to presented are distributed before the conference and authors revise and resubmit their papers after the conference, incorporating input from conference discussions. These final versions are published in this volume, and represent the final output of the 2009 conference. Happy reading! } } @TechReport{ it:2010-026, author = {Xin He and Maya Neytcheva and Stefano Serra Capizzano}, title = {On an Augmented {L}agrangian-Based Preconditioning of {O}seen Type Problems}, institution = it, department = tdb, year = 2010, number = {2010-026}, month = nov, abstract = {The paper deals with a general framework for constructing preconditioners for saddle point matrices, in particular as arising in the discrete linearized Navier-Stokes equations (Oseen's problem). We utilize the so-called augmented Lagrangian approach, where the original linear system of equations is first transformed to an equivalent one, which latter is then solved by a preconditioned iterative solution method. The matrices in the linear systems, arising after the discretization of Oseen's problem, are of two-by-two block form as are the best known preconditioners for these. In the augmented Lagrangian formulation, a scalar regularization parameter is involved, which strongly influences the quality of the block-preconditioners for the system matrix (referred to as 'outer'), as well as the conditioning and the solution of systems with the resulting pivot block (referred to as 'inner') which, in the case of large scale numerical simulations has also to be solved using an iterative method. We analyse the impact of the value of the regularization parameter on the convergence of both outer and inner solution methods. The particular preconditioner used in this work exploits the inverse of the pressure mass matrix. We study the effect of various approximations of that inverse on the performance of the preconditioners, in particular that of a sparse approximate inverse, computed in an element-by-element fashion. We analyse and compare the spectra of the preconditioned matrices for the different approximations and show that the resulting preconditioner is independent of problem, discretization and method parameters, namely, viscosity, mesh size, mesh anisotropy. We also discuss possible approaches to solve the modified pivot matrix block.} } @TechReport{ it:2010-025, author = {Soma Tayamon and Torbj{\"o}rn Wigren}, title = {Recursive Identification and Scaling of Non-linear Systems using Midpoint Numerical Integration}, institution = it, department = syscon, year = 2010, number = {2010-025}, month = oct, abstract = {A new recursive prediction error algorithm (RPEM) based on a non- linear ordinary differential equation (ODE) model of black-box state space form is presented. The selected model is discretised by a midpoint inte- gration algorithm and compared to an Euler forward algorithm. When the algorithm is applied, scaling of the sampling time is used to improve performance further. This affects the state vector, the parameter vector and the Hessian. This impact is analysed and described in three Theorems. Numerical examples are provided to verify the theoretical results obtained.} } @TechReport{ it:2010-024, author = {Elias Rudberg and Emanuel H. Rubensson}, title = {Assessment of Density Matrix Methods for Electronic Structure Calculations}, institution = it, department = tdb, year = 2010, number = {2010-024}, month = oct, abstract = {Purification and minimization methods for computation of the one-particle density matrix are compared. This is done by considering the work needed by each method to achieve a given accuracy in terms of the difference to the exact solution. Simulations employing orthogonal as well as non-orthogonal versions of the methods are performed using both element magnitude and cutoff radius based truncation approaches. The results indicate that purification is considerably more efficient than the studied minimization methods even when a good starting guess for minimization is available. The computational cost of the studied minimization methods is observed to be significantly more sensitive to small band gaps than purification. An $\mathcal{O}(\sqrt{1/\xi})$ dependence on the band gap $\xi$ is observed for minimization which can be compared to the $\mathcal{O}(\ln{(1/\xi)})$ dependence for purification. Minimization is found to perform at its best at 50\% occupancy. Error control and stopping criteria are also discussed.} } @TechReport{ it:2010-023, author = {Ken Mattsson}, title = {Summation by Parts Operators for Finite Difference Approximations of Second-Derivatives with Variable Coefficients}, institution = it, department = tdb, year = 2010, number = {2010-023}, month = oct, abstract = {Finite difference operators approximating second derivatives with variable coefficients and satisfying a summation-by-parts rule have been derived for the second-, fourth- and sixth-order case by using the symbolic mathematics software Maple. The operators are based on the same norms as the corresponding approximations of the first derivate, which makes the construction of stable approximations to general multi-dimensional hyperbolic-parabolic problems straightforward.} } @TechReport{ it:2010-022, author = {Torbj{\"o}rn Wigren and Linda Brus and Soma Tayamon}, title = {{MATLAB} Software for Recursive Identification and Scaling Using a Structured Nonlinear Black-box Model - Revision 6}, institution = it, department = syscon, year = 2010, number = {2010-022}, month = sep, note = {Revised version of nr 2010-002. The software package can be downloaded from \url{http://www.it.uu.se/research/publications/reports/2010-022/NRISSoftwareRev6.zip}.} , abstract = {This report is intended as a users manual for a package of MATLAB scripts and functions, developed for recursive prediction error identification of nonlinear state space systems and nonlinear static systems. The core of the package is an implementation of related output error identification and scaling algorithms. The algorithms are based on a continuous time, structured black box state space model of a nonlinear system. Furthermore, to initialize the algorithm an initiation scheme based on Kalman filter theory is included. The purpose of the initialization algorithm is to find initial parameters for the prediction error algorithm, and thus reducing the risk of convergence to local false minima. An RPEM algorithm for recursive identification of nonlinear static systems, that re-uses the parameterization of the nonlinear ODE model, is also included in the software package. In this version of the software a new discretization of the continuous time model based on the midpoint integration algorithm is added. The software can only be run off-line, i.e. no true real time operation is possible. The algorithms are however implemented so that true on-line operation can be obtained by extraction of the main algorithmic loop. The user must then provide the real time environment. The software package contains scripts and functions that allow the user to either input live measurements or to generate test data by simulation. The scripts and functions for the setup and execution of the identification algorithms are somewhat more general than what is described in the references. There is e.g. support for automatic re-initiation of the algorithms using the parameters obtained at the end of a previous identification run. This allows for multiple runs through a set of data, something that is useful for data sets that are too short to allow convergence in a single run. The re-initiation step also allows the user to modify the degrees of the polynomial model structure and to specify terms that are to be excluded from the model. This makes it possible to iteratively re-fine the estimated model using multiple runs. The functionality for display of results include scripts for plotting of data, parameters, prediction errors, eigenvalues and the condition number of the Hessian. The estimated model obtained at the end of a run can be simulated and the model output plotted, alone or together with the data used for identification. Model validation is supported by two methods apart from the display functionality. First, a calculation of the RPEM loss function can be performed, using parameters obtained at the end of an identification run. Secondly, the accuracy as a function of the output signal amplitude can be assessed.} } @TechReport{ it:2010-021, author = {Michael Thun{\'e} and Anna Eckerdal}, title = {Students' Conceptions of Computer Programming}, institution = it, department = tdb, year = 2010, number = {2010-021}, month = sep, note = {The phenomenographic outcome space presented in this report has previously been published as part of a journal article (Thun{\'e} and Eckerdal 2009). Due to space limitations in the journal publication, we have found it appropriate to make available a more comprehensive description of the outcome space, in the present technical report.}, abstract = {The present work has its focus on university level engineering education students that do not intend to major in computer science but still have to take a mandatory programming course. Phenomenography is applied to empirical data in the form of semi-structured interviews with students who had recently taken an introductory programming course. A phenomenographic outcome space is presented, with five qualitatively different categories of description of students~ ways of seeing computer programming.} } @TechReport{ it:2010-020, author = {Torbj{\"o}rn Wigren}, title = {Input-Output Data Sets for Development and Benchmarking in Nonlinear Identification}, institution = it, department = syscon, year = 2010, number = {2010-020}, month = aug, note = {The software package can be downloaded from \url{http://www.it.uu.se/research/publications/reports/2010-020/NonlinearData.zip}} , abstract = {This report presents two sets of data, suitable for development, testing and benchmarking of system identification algorithms for nonlinear processes. The first data set is recorded from a laboratory process that can be well described by a block oriented nonlinear model. The data set is challenging; it consists of only 500 samples, the nonlinear effect is large and the damping is not too good. The second data set is recorded from a laboratory process known to be governed by nonlinear differential equations.} } @TechReport{ it:2010-019, author = {David Ekl{\"o}v and David Black-Schaffer and Erik Hagersten}, title = {StatCC: Design and Evaluation}, institution = it, department = docs, year = 2010, number = {2010-019}, month = jun, abstract = {This work presents StatCC, a simple and efficient model for estimating the shared cache miss ratios of co-scheduled applications on architectures with a hierarchy of private and shared caches. StatCC leverages the StatStack cache model to estimate the co-scheduled applications' cache miss ratios from their individual memory reuse distance distributions, and a simple performance model that estimates their CPIs based on the shared cache miss ratios. These methods are combined into a system of equations that explicitly models the CPIs in terms of the shared miss ratios and can be solved to determine both. The result is a fast algorithm with a 2\% error across the SPEC CPU2006 benchmark suite compared to a simulated in-order processor and a hierarchy of private and shared caches.} } @TechReport{ it:2010-018, author = {Jeremy E. Kozdon and Eric M. Dunham and Jan Nordstr{\"o}m}, title = {Interaction of Waves with Frictional Interfaces Using Summation-By-Parts Difference Operators, 2. Extension to Full Elastodynamics}, institution = it, department = tdb, year = 2010, number = {2010-018}, month = jun, abstract = {Problems in elastodynamics with nonlinear boundary conditions, such as those arising when modeling earthquake rupture propagation along internal interfaces (faults) governed by nonlinear friction laws, are inherently boundary driven. For such problems, stable and accurate enforcement of boundary conditions is essential for obtaining globally accurate numerical solutions (and predictions of ground motion in earthquake simulations). High-order finite difference methods are a natural choice for problems like these involving wave propagation, but enforcement of boundary conditions is complicated by the fact that the stencil must transition to one-sided near the boundary. In this work we develop a high-order method for tensor elasticity with faults whose strength is a nonlinear function of sliding velocity and a set of internal state variables obeying differential evolution equations (a mathematical framework known as rate-and-state friction). The method is based on summation-by-parts finite difference operators and weak enforcement of boundary conditions using the simultaneous approximation term method. We prove that the method is strictly stable and dissipates energy at a slightly faster rate than the continuous solution (with the difference in energy dissipation rates vanishing as the mesh is refined).} } @TechReport{ it:2010-017, author = {Jeremy E. Kozdon and Eric M. Dunham and Jan Nordstr{\"o}m}, title = {Interaction of Waves with Frictional Interfaces Using Summation-By-Parts Difference Operators, 1. Weak Enforcement of Nonlinear Boundary Conditions}, institution = it, department = tdb, year = 2010, number = {2010-017}, month = jun, abstract = {In this work we develop a high-order method for problems in scalar elastodynamics with nonlinear boundary conditions in a form closely related to those seen in earthquake rupture modeling and other frictional sliding problems. By using summation-by-parts finite difference operators and weak enforcement of boundary conditions with the simultaneous approximation term method, a strictly stable method is developed that dissipates energy at a slightly faster rate than the continuous solution (with the difference in energy dissipation rates vanishing as the mesh is refined). Furthermore, it is shown that unless boundary conditions are formulated in terms of characteristic variables, as opposed to the physical variables in terms of which boundary conditions are more naturally stated, the semi-discretized system of equations can become extremely stiff, preventing efficient solution using explicit time integrators. These theoretical results are confirmed by several numerical tests demonstrating the high-order convergence rate of the method and the benefits of using strictly stable numerical methods for long time integration. } } @TechReport{ it:2010-016, author = {A. Rensfelt and T. S{\"o}derstr{\"o}m}, title = {Parametric Identification of Complex Modulus}, institution = it, department = syscon, year = 2010, number = {2010-016}, month = jun, abstract = {This report treats three different approaches to parametric identification of the complex modulus of a viscoelastic material. In the first approach, a nonparametric estimate is used to fit the parametric model, while in the other two the model is fitted directly from data. In all three cases, theoretical expressions for the accuracy of the estimate are derived under the assumption that the measurement noise is white and that the signal-to-noise ratio is large. The expressions are validated against both simulated and experimental data. In the case of experimental data it is seen that the theoretical expression severely underestimates the variance of the identified parameters, and one of the expressions is therefore modified to cover the case of correlated noise with much better agreement as a result.} } @TechReport{ it:2010-015, author = {Parosh Aziz Abdulla and Yu-Fang Chen and Giorgio Delzanno and Fr{\'e}d{\'e}ric Haziza and Chih-Duo Hong and Ahmed Rezine}, title = {Constrained Monotonic Abstraction: a {CEGAR} for Parameterized Verification}, institution = it, department = docs, year = 2010, number = {2010-015}, month = jun, abstract = {In this paper, we develop a counterexample-guided abstraction refinement (CEGAR) framework for \emph{monotonic abstraction}, an approach that is particularly useful in automatic verification of safety properties for \emph{parameterized systems}. The main drawback of verification using monotonic abstraction is that it sometimes generates spurious counterexamples. Our CEGAR algorithm automatically extracts from each spurious counterexample a set of configurations called a "Safety Zone" and uses it to refine the abstract transition system of the next iteration. We have developed a prototype based on this idea; and our experimentation shows that the approach allows to verify many of the examples that cannot be handled by the original monotonic abstraction approach.} } @TechReport{ it:2010-014, author = {Stefan Hellander and Per L{\"o}tstedt}, title = {Flexible Single Molecule Simulation of Reaction-Diffusion Processes}, institution = it, department = tdb, year = 2010, number = {2010-014}, month = may, abstract = {An algorithm is developed for simulation of the motion and reactions of single molecules at a microscopic level. The molecules diffuse in a solvent and react with each other or a polymer and molecules can dissociate. Such simulations are of interest e.g. in molecular biology. The algorithm is similar to the Green's function reaction dynamics (GFRD) algorithm by van Zon and ten Wolde where longer time steps can be taken by computing the probability density functions (PDFs) and then sample from its distribution function. Our computation of the PDFs is much less complicated than GFRD and more flexible. The solution of the partial differential equation for the PDF is split into two steps to simplify the calculations. The sampling is without splitting error in two of the coordinate directions for a pair of molecules and a molecule-polymer interaction and is approximate in the third direction. The PDF is obtained either from an analytical solution or a numerical discretization. The errors due to the operator splitting, the partitioning of the system, and the numerical approximations are analyzed. The method is applied to three different systems involving up to four reactions. Comparisons with other mesoscopic and macroscopic models show excellent agreement. } } @TechReport{ it:2010-013, author = {Jonas Boustedt}, title = {Ways to Understand Class Diagrams}, institution = it, year = 2010, number = {2010-013}, month = mar, note = {Typographic updates 2010-05-04.}, abstract = {The software industry needs well trained software designers and one important aspect of software design is the ability to model software designs visually and understand what visual models represent. However, previous research indicates that software design is a difficult task to many students. This paper reports empirical findings from a phenomenographic investigation on how students understand class diagrams, UML symbols and relations to object oriented concepts. The informants were 20 Computer Science students from four different universities in Sweden. The results show qualitively different ways to understand and describe UML class diagrams and the "diamond symbols" representing aggregation and composition. The purpose of class diagrams was understood in a varied way, from describing it as a documentation to a more advanced view related to communication. The descriptions of class diagrams varied from seeing them as a specification of classes to a more advanced view where they were described to show hierarchic structures of classes and relations. The diamond symbols were seen as "relations" and a more advanced way was seeing the white and the black diamonds as different symbols for aggregation and composition. As a consequence of the results, it is recommended that UML should be adopted in courses. It is briefly indicated how the phenomenographic results in combination with variation theory can be used by teachers to enhance students' possibilities to reach advanced understanding of phenomena related to UML class diagrams. Moreover, it is recommended that teachers should put more effort in assessing skills in proper using of the basic symbols and models, and students should get many opportunities to practise collaborative design, e.g., using whiteboards.} } @TechReport{ it:2010-012, author = {Jonas Boustedt}, title = {A Student Perspective on Software Development and Maintenance}, institution = it, year = 2010, number = {2010-012}, month = mar, abstract = {How do Computer Science students view Software Development and Software Maintenance? To answer this question, a Phenomenographic perspective was chosen, and 20 Swedish students at four universities were interviewed. The interviews were analyzed to find in which different ways the informants, on collective level, see the phenomena of interest. The resulting outcome spaces show that software development is described in a number of qualitatively different ways reaching from problem solving, design and deliver, design for the future and then a more comprehensive view that includes users, customers, budget and other aspects. Software maintenance is described as correcting bugs, making additions, adapting to new requirements from the surroundings, and something that is a natural part of the job. Finally, conclusions from the results and additional observations are discussed in terms of their implications for teaching, and some suggestions for practical use are given.} } @TechReport{ it:2010-011, author = {Soma Tayamon and Torbj{\"o}rn Wigren}, title = {Recursive Prediction Error Identification and Scaling of Non-linear Systems with Midpoint Numerical Integration}, institution = it, department = syscon, year = 2010, number = {2010-011}, month = mar, abstract = {A new recursive prediction error algorithm (RPEM) based on a non-linear ordinary differential equation (ODE) model of black-box state space form is presented. The selected model is discretised by a midpoint integration algorithm and compared to an Euler forward algorithm. When the algorithm is applied, scaling of the sampling time is used to improve performance further. This affects the state vector, the parameter vector and the Hessian. This impact is analysed and described in three Theorems. Numerical examples are provided to verify the theoretical results obtained.} } @TechReport{ it:2010-010, author = {Maya Neytcheva and Erik B{\"a}ngtsson and Elisabeth Linn{\'e}r}, title = {Finite-Element Based Sparse Approximate Inverses for Block-Factorized Preconditioners}, institution = it, department = tdb, year = 2010, number = {2010-010}, month = mar, abstract = {In this work we analyse a method to construct a numerically efficient and computationally cheap sparse approximations of some of the matrix blocks arising in the block-factorised preconditioners for matrices with a two-by-two block structure. The matrices arise from finite element discretizations of partial differential equations. We consider scalar elliptic problems, however the approach is appropriate for other types of problems such as parabolic or systems of equations. The technique is applicable for both selfadjoint and non-selfadjoint problems, in two as well as in three dimensions. We analyze in detail the 2D case and provide extensive numerical evidence for the efficiency of the proposed matrix approximations, both serial and parallel.} } @TechReport{ it:2010-009, author = {Salman Toor and Bjarte Mohn and David Cameron and Sverker Holmgren}, title = {Case-Study for Different Models of Resource Brokering in Grid Systems}, institution = it, department = tdb, year = 2010, number = {2010-009}, month = mar, abstract = {To identify the best suitable resource for a given task in a grid system, under constraints of limited available information, requires an elaborate strategy. The task gets even more complicated if the environment is heterogeneous and the availability of the resources or its information is not guaranteed for all the time. Efficient and reliable brokering in grid systems has been discussed extensively, and different strategies and models have been presented. However, this issue still needs more attention. In this paper we first review different brokering models, compare them and discuss the issues related to them. We have identify two key areas for improving the resource allocation: The \textbf{Information Flow} in the system and the \textbf{Data Awareness} of the resource selection. Our results show that the better management information flow between different components in the grid system significantly improves the efficiency of the resource allocation process.} } @TechReport{ it:2010-008, author = {Martins da Silva, Margarida and Teresa Mendon\c{c}a and Torbj{\"o}rn Wigren}, title = {Online Nonlinear Identification of the Effect of Drugs in Anaesthesia Using a Minimal Parameterization and {BIS} Measurements}, institution = it, department = syscon, year = 2010, number = {2010-008}, month = mar, abstract = {This paper addresses the problem of modeling and identification of the Depth of Anaesthesia (DoA). It presents a new MISO Wiener model for the pharmacokinetics and pharmacodynamics of propofol and remifentanil, when jointly administered to patients undergoing surgery. The models most commonly used to describe the effect of drugs in the human body are overparameterized Wiener models. In particular, in an anaesthesia environment, the high number of patient-dependent parameters coupled with the insufficient excitatory pattern of the input signals (drug dose profiles) and the presence of noise make robust identification strategies difficult to find. In fact, in such clinical application the user cannot freely choose the input signals to enable accurate parameter identification. A new MISO Wiener model with only four parameters is hence proposed to model the effect of the joint administration of the hypnotic propofol and the analgesic remifentanil. An Extended Kalman Filter (EKF) algorithm was used to perform the nonlinear online identification of the system parameters. The results show that both the new model and the identification strategy outperform the currently used tools to infer individual patient response. The proposed DoA identification scheme was evaluated in a real patient database, where the DoA is quantified by the Bispectral Index Scale (BIS) measurements. The results obtained so far indicate that the developed approach will be a powerful tool for modeling and identification of anaesthetic drug dynamics during surgical procedures.} } @TechReport{ it:2010-007, author = {Egi Hidayat and Alexander Medvedev}, title = {Parameter Estimation in a Pulsatile Hormone Secretion Model}, institution = it, department = syscon, year = 2010, number = {2010-007}, month = mar, abstract = {This paper presents an algorithm to estimate parameters of a mathematical model of a bipartite endocrine axis. Secretion of one of the involved hormones is stimulated by the concentration of another one, called release hormone, with the latter secreted in a pulsatile manner. The hormone mechanism in question appears often in animal and human endocrine systems, i.e. in the regulation of testosterone in the human male. The model has been introduced elsewhere and enables the application of the theory of pulse-modulated feedback control systems to analysis of pulsatile endocrine regulation. The state-of-the art methods for hormone secretion analysis could not be applied here due to different modeling approach. Based on the mathematical machinery of constrained nonlinear least squares minimization, a parameter estimation algorithm is proposed and shown to perform well on actual biological data yielding accurate fitting of luteinizing hormone concentration profiles. The performance of the algorithm is compared with that of state-of-the art techniques and appears to be good especially in case of undersampled data.} } @TechReport{ it:2010-006, author = {Carl Nettelblad and Sverker Holmgren}, title = {Stochastically Guaranteed Global Optimums Achievable with a Divide-and-Conquer Approach to Multidimensional {QTL} Searches}, institution = it, department = tdb, year = 2010, number = {2010-006}, month = mar, abstract = {The problem of searching for multiple quantitative trait loci (QTL) in an experimental cross population of considerable size poses a significant challenge, if general interactions are to be considered. Different global optimization approaches have been suggested, but without an analysis of the mathematical properties of the objective function, it is hard to devise reasonable criteria for when the optimum found in a search is truly global. We reformulate the standard residual sum of squares objective function for QTL analysis by a simple transformation, and show that the transformed function will be Lipschitz continuous in an infinite-size population, with a well-defined Lipschitz constant. We discuss the different deviations possible in an experimental finite-size population, suggesting a simple bound for the minimum value found in the vicinity of any point in the model space. Using this bound, we modify the DIRECT optimization algorithm to exclude regions where the optimum cannot be found according to the bound. This makes the algorithm more attractive than previously realized, since optimality is now in practice guaranteed. The consequences are realized in permutation testing, used to determine the significance of QTL results. DIRECT previously failed in attaining the correct thresholds. In addition, the knowledge of a candidate QTL for which significance is tested allows spectacular increases in permutation test performance, as most searches can be abandoned at an early stage.} } @TechReport{ it:2010-005, author = {Carl Nettelblad and {\"O}rjan Carlborg and Jos{\'e} M. lvarez-Castro}, title = {Assessing Orthogonality and Statical Properties of Linear Regression Methods for Interval Mapping with Partial Information}, institution = it, department = tdb, year = 2010, number = {2010-005}, month = mar, abstract = {\textbf{Background:} Mapping quantitative trait loci (QTL) has become a widely used tool in genetical research. In such experiments, it is desired to obtain orthogonal estimates of genetic effects for a number of reasons concerning both the biological meaning of the estimated locations and effects, and making the statistical analysis clearer and more robust. The currently used statistical methods, however, are not optimized for orthogonality, especially in cases involving interval mapping between markers and/or in incomplete datasets. This is an adverse limitation for the application of such methods for QTL scans involving model selection over putative complex gene networks. \textbf{Results:} We describe how deviations from orthogonality arise in currently used methods. We demonstrate one option for obtaining orthogonal estimates of genetic effects using multiple imputations per individual in an otherwise unchanged regression context. Our proposed IRIM method avoids inflated values for explainable variance and genetic effect variables, while showing a clear preference for marker locations in a fine mapping context. Despite possible shortcomings, similar results to linear regression are demonstrated for our proposed approach (IRIM) in an experimental dataset. \textbf{Conclusions:} Imputation-based methods can be used to enhance the statistical dissectability of effects, as well as computational performance. We exemplify how Haley-Knott regression is not only distorting the explainable variance, but also point out how the estimated phenotype values between classes, and the resulting effects, become dependent. This illustrates the need for a more radical departure in the approach chosen in order to achieve orthogonality.} } @TechReport{ it:2010-004, author = {Kenneth Duru and Gunilla Kreiss}, title = {A Well-posed and Discretely Stable Perfectly Matched Layer for Elastic Wave Equations in Second Order Formulation}, institution = it, department = tdb, year = 2010, number = {2010-004}, month = feb, abstract = {We present a well-posed and discretely stable perfectly matched layer for the anisotropic (and isotropic) elastic wave equations without first re-writing the governing equations as a first order system. The new model is derived by the complex coordinate stretching technique. Using standard perturbation methods we show that complex frequency shift together with a chosen real scaling factor ensures the decay of eigen-modes for all relevant frequencies. To buttress the stability properties and the robustness of the proposed model, numerical experiments are presented for anisotropic elastic wave equations. The model is approximated with a stable node-centered finite difference scheme that is second order accurate both in time and space.} } @TechReport{ it:2010-003, author = {Andreas Hellander and Per L{\"o}tstedt}, title = {Incorporating Active Transport of Cellular Cargo in Stochastic Mesoscopic Models of Living Cells}, institution = it, department = tdb, year = 2010, number = {2010-003}, month = feb, abstract = {We propose a new multiscale method to incorporate active transport of cargo particles in biological cells in stochastic, mesoscopic models of reaction-transport processes. Given a discretization of the computational domain, we find stochastic, convective mesoscopic molecular fluxes over the edges or facets of the subvolumes and relate the process to a corresponding first order finite volume discretization of the linear convection equation. We give an example of how this can be used to model active transport of cargo particles on a microtubule network by the motor proteins kinesin and dynein. In this way we extend mesoscopic reaction-diffusion models of biochemical reaction networks to more general models of molecular transport within the living cell.} } @TechReport{ it:2010-002, author = {Torbj{\"o}rn Wigren and Linda Brus and Soma Tayamon}, title = {{MATLAB} Software for Recursive Identification and Scaling Using a Structured Nonlinear Black-box Model - Revision 5}, institution = it, department = syscon, year = 2010, number = {2010-002}, month = jan, note = {Revised version of nr 2008-007. The software package can be downloaded from \url{http://www.it.uu.se/research/publications/reports/2010-002/NRISSoftwareRev5.zip}. \textbf{Note} that the software package was updated on 2010-03-16.}, abstract = {This report is intended as a users manual for a package of MATLAB scripts and functions, developed for recursive prediction error identification of nonlinear state space systems and nonlinear static systems. The core of the package is an implementation of an output error identification and scaling algorithm. The algorithm is based on a continuous time, structured black box state space model of a nonlinear system. Furthermore, to initialize the algorithm an algorithm based on Kalman filter theory is included. The purpose of the initialization algorithm is to find initial parameters for the prediction error algorithm, and thus reducing the risk of convergence to local false minima. An RPEM algorithm for recursive identification of nonlinear static systems, that re-uses the parameterization of the nonlinear ODE model, is also included in the software package. In this version of the software the discretization of the continuous time model is based on the midpoint integration algorithm. The software can only be run off-line, i.e. no true real time operation is possible. The algorithms are however implemented so that true on-line operation can be obtained by extraction of the main algorithmic loop. The user must then provide the real time environment. The software package contains scripts and functions that allow the user to either input live measurements or to generate test data by simulation. The scripts and functions for the setup and execution of the identification algorithms are somewhat more general than what is described in the references. There is e.g. support for automatic re-initiation of the algorithms using the parameters obtained at the end of a previous identification run. This allows for multiple runs through a set of data, something that is useful for data sets that are too short to allow convergence in a single run. The re-initiation step also allows the user to modify the degrees of the polynomial model structure and to specify terms that are to be excluded from the model. This makes it possible to iteratively re-fine the estimated model using multiple runs. The functionality for display of results include scripts for plotting of data, parameters, prediction errors, eigenvalues and the condition number of the Hessian. The estimated model obtained at the end of a run can be simulated and the model output plotted, alone or together with the data used for identification. Model validation is supported by two methods apart from the display functionality. First, a calculation of the RPEM loss function can be performed, using parameters obtained at the end of an identification run. Secondly, the accuracy as a function of the output signal amplitude can be assessed.} } @TechReport{ it:2010-001, author = {Mahen Jayawardena and Salman Toor and Sverker Holmgren}, title = {Computational and Visualization tools for Genetic Analysis of Complex Traits}, institution = it, department = tdb, year = 2010, number = {2010-001}, month = jan, abstract = {We present grid based tools for simultaneous mapping of multiple locations (QTL) in the genome that affect quantitative traits (e.g. body weight, blood pressure) in experimental populations. The corresponding computational problem is very computationally intensive. We have earlier shown that, using appropriate parallelization schemes, this type of application is suitable for deployment on grid systems. A grid portal interface is often an ideal tool for biologists performing advanced genetic analysis. We describe an implementation of such a portal system and how it is used for performing multidimensional QTL searches efficiently.} } @TechReport{ it:2009-027, author = {Jens Lindstr{\"o}m and Jan Nordstr{\"o}m}, title = {A Stable and High Order Accurate Conjugate Heat Transfer Problem}, institution = it, department = tdb, year = 2009, number = {2009-027}, month = nov, abstract = {This paper analyzes well-posedness and stability of a conjugate heat transfer problem in one space dimension. We study a model problem for heat transfer between a fluid and a solid. The energy method is used to derive boundary and interface conditions that make the continuous problem well-posed and the semi-discrete problem stable. The numerical scheme is implemented using 2nd, 3rd and 4th order finite difference operators on Summation-By-Parts (SBP) form. The boundary and interface conditions are implemented weakly. We investigate the spectrum of the spatial discretization to determine which type of coupling that gives attractive convergence properties. The rate of convergence is verified using the method of manufactured solutions.} } @TechReport{ it:2009-026, author = {Sara Zahedi and Martin Kronbichler and Gunilla Kreiss}, title = {Spurious Currents in a Finite-Element Based Level Set Method for Two Phase Flow}, institution = it, department = tdb, year = 2009, number = {2009-026}, month = nov, abstract = {A study of spurious currents in finite element based simulations of the incompressible Navier-Stokes equations for two phase flows is presented, based on computations on a circular drop in equilibrium. The interface is accounted for by a level set method. It is shown that a sharp surface tension force, expressed as a line integral along the interface, can give rise to large spurious currents and oscillations in the pressure. If instead a regularized surface tension representation is used, exact force balance at the interface is possible, both for a fully coupled discretization approach as well as for a fractional step projection method. We illustrate that with exact force balance, the spurious currents are of the order of the tolerance of the linear solver. However, the numerical curvature calculation introduces errors, that cause spurious currents. Different ways to extend the curvature from the interface to the whole domain are discussed and investigated. It is shown that the choice of curvature extension has a significant impact on the error in pressure. The impact of using different finite element spaces is also investigated.} } @TechReport{ it:2009-025, author = {Olof Rensfelt and Frederik Hermansand Christofer Ferm and Per Gunningberg and Lars-{\AA}ke Larzon}, title = {Sensei-{UU}: A Nomadic Sensor Network Testbed Supporting Mobile Nodes}, institution = it, department = docs, year = 2009, number = {2009-025}, month = oct, abstract = {We present Sensei - a nomadic, or relocatable, wireless sensor network (WSN) testbed with support for mobile nodes. The nomadism makes it possible to evaluate a WSN application in different environments ranging from lab environment to in-situ installations to prototype deployments. Other WSN testbeds are often static and can not be easily moved between sites. To be easily relocateable and highly flexible, Sensei uses a wireless 802.11 b/g network as control channel. Our design with a wireless control channel allows easy incorporation of mobile nodes. Since sensor nodes often use 802.15.4 (ZigBee) communication, we have investigated the interference between the control channel and a 802.15.4 WSN to ensure that this approach is a feasible approach. For repeatability in terms of mobility, nodes can be carried either by humans or robots following mobility scripts. We present a method for localization of mobile nodes based on robot technology. The robots use laser range finders for localization and navigate in a predefined map. We evaluate the precision of this method in an office like environment to ensure sufficient repeatability of mobility experiments.} } @TechReport{ it:2009-024, author = {Iordanis Kavathatzopoulos}, title = {{AvI}-index: A tool to assess usability}, institution = it, department = hci, year = 2009, number = {2009-024}, month = oct, abstract = {AvI-index is a usability measurement questionnaire to assess IT systems~ usability as a factor dependent on efficiency, effectiveness and work environment. It is focused on how personal skills and organizational processes contribute to successful user participation and through that to higher usability, to better work environment, and to higher effectiveness and efficiency. Reliability coefficients and correlations to objective criteria were high confirming the original hypothesis. AvI-index can be used to acquire information about an IT system~s usability in an easy and quick way. It can be used to evaluate interventions and changes of IT systems. It is also a suitable method to apply continuously over a longer period of time. Although AvI-index only provides an indicative value, such a diagnosis of usability in an organization~s IT infrastructure is valuable as an alert and to determine the extent of further initiatives. } } @TechReport{ it:2009-023, author = {Martins da Silva, Margarida and Torbj{\"o}rn Wigren and Teresa Mendonca}, title = {Nonlinear Identification of a Minimal NeuroMuscular Blockade Model in Anaesthesia}, institution = it, department = syscon, year = 2009, number = {2009-023}, month = sep, abstract = {The paper presents new modeling and identification strategies to address the many difficulties in the identification of anaesthesia dynamics. During general anaesthesia procedures muscle relaxants are drugs frequently administered. The most commonly used models for the effect of such drugs, called NeuroMuscular Blockade (NMB), comprise a high number (greater than eight) of pharmacokinetic and pharmacodynamic (PK/PD) parameters. The main issue concerning the NMB system identification is that, in the clinical practice, the user cannot freely choose the system input signals (drug dose profiles to be administered to the patients) to enable the identification of such a high number of parameters. The limited amount of measurement data also indicates a need for new identification strategies. A new SISO Wiener model with two parameters is hence proposed to model the effect of the muscle relaxant atracurium. A batch Prediction Error Method (PEM) was first developed to optimize the model structure. Secondly, an Extended Kalman Filter (EKF) approach was used to perform the online identification of the system parameters. Both approaches outperform conventional identification strategies, showing good results regarding parameter identification and measured signal tracking, when evaluated on a large patient database. The new methods proved to be adequate for the description of the system, even with the poor input signal excitation and the few measured data samples present in this application. It turns out that the methods are of general validity for the identification of drug dynamics in the human body.} } @TechReport{ it:2009-022, author = {Katharina Kormann and Sverker Holmgren and Hans O. Karlsson}, title = {A {F}ourier-Coefficient Based Solution of an Optimal Control Problem in Quantum Chemistry}, institution = it, department = tdb, year = 2009, number = {2009-022}, month = sep, abstract = {We consider an optimal control problem for the time-dependent Schr{\"o}dinger equation modeling molecular dynamics. Given a molecule in its ground state, the interaction with a tuned laser pulse can result in an excitation to a state of interest. By these means, one can optimize the yield of chemical reactions. The problem of designing an optimal laser pulse can be posed as an optimal control problem. We reformulate the optimization problem by Fourier-transforming the electric field of the laser and narrow the frequency band. In this way, we reduce the dimensionality of the control variable. This allows for storing an approximate Hessian and, thereby, we can solve the optimization problem with a quasi-Newton method. Such an implementation provides superlinear convergence. We show computational results for a Raman-transition example and give numerical evidence that our algorithm can outperform the standard Krotov-like method which does not employ approximative second derivatives. \end{abstract} } } @TechReport{ it:2009-021, author = {Katharina Kormann and Sverker Holmgren and Hans O. Karlsson}, title = {Global Error Control of the Time-Propagation for the {S}chr{\"o}dinger Equation with a Time-Dependent {H}amiltonian}, institution = it, department = tdb, year = 2009, number = {2009-021}, month = sep, abstract = {We use a posteriori error estimation theory to derive a relation between local and global error in the propagation for the time-dependent Schr\"odinger equation. Based on this result, we design a class of $h,p$-adaptive Magnus--Lanczos propagators capable of controlling the global error of the time-stepping scheme by only solving the equation once. We provide results for models of several different small molecules including bounded and dissociative states, illustrating the efficiency and wide applicability of the new methods. } } @TechReport{ it:2009-020, author = {Bengt Fornberg and Elisabeth Larsson and Natasha Flyer}, title = {Stable Computations with {G}aussian Radial Basis Functions in {2-D}}, institution = it, department = tdb, year = 2009, number = {2009-020}, month = aug, abstract = {Radial basis function (RBF) approximation is an extremely powerful tool for representing smooth functions in non-trivial geometries, since the method is meshfree and can be spectrally accurate. A perceived practical obstacle is that the interpolation matrix becomes increasingly ill-conditioned as the RBF shape parameter becomes small, corresponding to flat RBFs. Two stable approaches that overcome this problem exist, the Contour-Pad\'e method and the RBF-QR method. However, the former is limited to small node sets and the latter has until now only been formulated for the surface of the sphere. This paper contains an RBF-QR formulation for planar two-dimensional problems. The algorithm is perfectly stable for arbitrarily small shape parameters and can be used for up to a thousand node points in double precision and for several thousand node points in quad precision. A sample MATLAB code is provided.} } @TechReport{ it:2009-019, author = {David Ekl{\"o}v and Erik Hagersten}, title = {Statstack: Efficient Modeling of {LRU} Caches}, institution = it, department = docs, year = 2009, number = {2009-019}, month = jul, abstract = {The identification of the memory gap in terms of the relatively slow memory accesses put a focus on cache performance in the 90s. The introduction of the moderately clocked multicores has shifted this focus from memory latency to memory bandwidth for modern processors. The multicore's limited cache capacity per thread in combination with their current a projected off-chip memory bandwidth limitation makes this the most likely bottleneck of future computer systems. This paper presents a new and efficient way of estimating the cache performance for an application. The method has several similarities with that of Stack Distance, but instead of counting \emph{unique memory objects}, as is done for Stack Distance calculations, our schema only requires the \emph{number of memory accesses} to be counted between two successive accesses to the same data object. This task can be efficiently handled at runtime by existing built-in hardware counters. Furthermore, only a small fraction of the memory accesses have to be monitored for an accurate estimation. We show how low-overhead runtime data, similar to that of StatCache, is sufficient to feed this model. We evaluate the accuracy of the proposed transformation based on sparse data and compare the results with that of native stack distance based all memory accesses. We show excellent accuracy over a wide range of cache sizes and applications. } } @TechReport{ it:2009-018, author = {Parosh Aziz Abdulla and Muhsin Atto and Jonathan Cederberg and Ran Ji}, title = {Automated Analysis of Data-Dependent Programs with Dynamic Memory}, institution = it, department = docs, year = 2009, number = {2009-018}, month = jun, abstract = {We present a new approach for automatic verification of data-dependent programs manipulating dynamic heaps. A heap is encoded by a graph where the nodes represent the cells, and the edges reflect the pointer structure between the cells of the heap. Each cell contains a set of variables which range over the natural numbers. Our method relies on standard backward reachability analysis, where the main idea is to use a simple set of predicates, called signatures, in order to represent bad sets of heaps. Examples of bad heaps are those which contain either garbage, lists which are not well-formed, or lists which are not sorted. We present the results for the case of programs with a single next-selector, and where variables may be compared for (in)equality. This allows us to verify for instance that a program, like bubble sort or insertion sort, returns a list which is well-formed and sorted, or that the merging of two sorted lists is a new sorted list. We report on the result of running a prototype based on the method on a number of programs.} } @TechReport{ it:2009-017, author = {Anna Nissen and Gunilla Kreiss}, title = {An Optimized Perfectly Matched Layer for the {S}chr{\"o}dinger Equation}, institution = it, department = tdb, year = 2009, number = {2009-017}, month = jun, abstract = {A perfectly matched layer (PML) for the Schr{\"o}dinger equation using a modal ansatz is presented. We derive approximate error formulas for the modeling error from the outer boundary of the PML and for the error from the discretization and show how these can be matched in order to obtain optimal performance of the PML. Included numerical results show that the PML works efficiently at a prescribed accuracy for the zero potential case, with a layer of width less than two percent of the computational domain.} } @TechReport{ it:2009-016, author = {Torsten S{\"o}derstr{\"o}m}, title = {Expressions for the Covariance Matrix of Covariance Data}, institution = it, department = syscon, year = 2009, number = {2009-016}, month = may, abstract = {In several estimation methods used in system identification, a first step is to estimate the covariance functions of the measured inputs and outputs for a small set of lags. These covariance elements can be set up as a vector. The report treats the problem of deriving and computing the asymptotic covariance matrix of this vector, when the number of underlying input-output data is large. The derived algorithm is derived under fairly general assumptions. It is assumed that the input and output are linked through a linear finite-order system. Further, the input is assumed to be modelled as an ARMA model of a fixed, but arbitrary order. Finally, it is allowed that the both the input and the output are not measured directly, but with some white measurement noise, thus including typical errors-in-variables situations in the analysis.} } @TechReport{ it:2009-015, author = {H{\aa}kan Selg}, title = {Tv{\aa} kulturer p{\aa} Internet: Resultat av faktor- och klusteranalys}, institution = it, department = hci, year = 2009, number = {2009-015}, month = may, note = {In Swedish.}, abstract = {Bland avancerade anv{\"a}ndare av Internet intar studenter en nyckelroll. Att studera deras beteenden p{\aa} n{\"a}tet g{\"o}r det m{\"o}jligt f{\"o}r oss att tidigt uppt{\"a}cka nya tendenser och att f{\"o}rst{\aa} vad som ligger framf{\"o}r oss p{\aa} n{\aa}gra {\aa}rs sikt. Vidare utg{\"o}r studenterna en stor grupp i samh{\"a}llet som snart kommer att f{\"o}ra sin nyvunna kunskap och sitt t{\"a}nkande in i arbetslivet. En enk{\"a}t riktad till studenter, doktorander och anst{\"a}llda vid 32 svenska universitet och h{\"o}gskolor genomf{\"o}rdes under h{\"o}sten 2007. Enk{\"a}tsvaren analyserades genom korstabuleringar, faktoranalys och klusteranalys. Ett framtr{\"a}dande resultat var att tv{\aa} olika anv{\"a}ndarm{\"o}nster bland Internetanv{\"a}ndarna tydligt gick att urskilja. Skillnaden {\"a}r av s{\aa}dan storlek att vi kan tala om tv{\aa} kulturer. Den ena kulturen, k{\"a}nnetecknas av env{\"a}gskommunikation, som att s{\"o}ka information, ta del av nyheter, betala r{\"a}kningar och best{\"a}lla resor. Kommunikationen {\"a}r ~vertikal~ och knyter samman f{\"o}retag med sina kunder, och myndigheter med medborgare. Det dominerande kommunikationsverktyget {\"a}r e-post. Den andra kulturen utm{\"a}rks av tv{\aa}v{\"a}gskommunikation d{\"a}r anv{\"a}ndarna samtidigt {\"a}r konsumenter av information och producenter av inneh{\aa}ll i n{\"a}tverk. Detta {\"a}r en v{\"a}rld av n{\"a}tgemenskaper, bloggar och andra sociala mjukvaror. Kommunikationen {\"a}r ~horisontell~ och med olika protokoll f{\"o}r \emph{instant messaging} (IM) som typiskt verktyg f{\"o}r kommunikation. Rapporten finns {\"a}ven publicerad i Nationellt IT-anv{\"a}ndarcentrums (NITA) rapportserie fr{\aa}n projektet InternetExplorers som delrapport 9.} } @TechReport{ it:2009-014, author = {Mark H. Carpenter and Jan Nordstr{\"o}m and David Gottlieb}, title = {Revisiting and Extending Interface Penalties for Multi-Domain Summation-by-Parts Operators}, institution = it, department = tdb, year = 2009, number = {2009-014}, month = may, abstract = {A general interface procedure is presented for multi-domain collocation methods satisfying the summation-by-parts (SBP) spatial discretization convention. Unlike more traditional operators (e.g. FEM) applied to the advection-diffusion equation, the new procedure penalizes the solution and the first p derivatives across the interface. The combined interior/interface operators are proven to be pointwise stable, and conservative, although accuracy deteriorates for p>=2. Penalties between two different sets of variables are compared (motivated by FEM primal and flux formulations), and are shown to be equivalent for certain choices of penalty parameters. Extensive validation studies are presented using two classes of high-order SBP operators: 1) central finite difference, and 2) Legendre spectral collocation.} } @TechReport{ it:2009-013, author = {Jim Wilenius}, title = {Combinatorial and Simultaneous Auction: A Pragmatic Approach to Tighter Bounds on Expected Revenue}, institution = it, department = csd, year = 2009, number = {2009-013}, month = may, note = {Updated May 28, 2009.}, abstract = {It is a common belief that combinatorial auctions provide good solutions to resource-allocation in multiple-object markets with synergies. In this work we adopt a pragmatic approach to examining the revenue bounds on combinatorial and simultaneous auctions. The theoretical bounds from our previous work utilize a large number of bidders in order to show that combinatorial auctions yield a higher expected revenue. It is reasonable to believe that the true bounds are much tighter. We argue that this is the indeed the case and that the first-price combinatorial auction is revenue superior even when a relatively small number of bidders participate. The argument is based on three methods. (i) heuristic equilibrium-strategy search, (ii) sampling of the expected revenue in the first-price sealed-bid combinatorial auction, and (iii) a tightened theoretical upper bound on the sealed-bid simultaneous auction in the case of few bidders.} } @TechReport{ it:2009-012, author = {Bj{\"o}rn Halvarsson and Bengt Carlsson}, title = {New Input/Output Pairing Strategies based on Minimum Variance Control and Linear Quadratic Gaussian Control}, institution = it, department = syscon, year = 2009, number = {2009-012}, month = may, note = {Updated June 2009.}, abstract = {In this paper a new input/output pairing strategy based on minimum variance control is proposed. A similar version based on linear quadratic Gaussian (LQG) control is also suggested. The strategies are used to compare the expected performance of decentralized control structures in some illustrative examples. The pairing suggestions are compared with the recommendations previously obtained using other interaction measures such as the Relative Gain Array (RGA). The new strategies give suitable pairing recommendations and are easy to interpret.} } @TechReport{ it:2009-011, author = {Jan Nordstr{\"o}m and Sofia Eriksson}, title = {Well Posed, Stable and Weakly Coupled Fluid Structure Interaction Problems}, institution = it, department = tdb, year = 2009, number = {2009-011}, month = apr, abstract = {We investigate problems of fluid structure interaction type and aim for a formulation that leads to a well posed problem and a stable numerical procedure. Our first objective is to investigate if the generally accepted formulations of the FSI problems are the only possible ones. Our second objective is to derive a numerical coupling which is truly stable. To accomplish that we will use a weak coupling procedure and employ summation- by-parts operators and penalty terms. We compare the weak coupling with other common procedures. We also study the effect of high order accurate schemes. In multiple dimensions this is a formidable task and for that reason we start by investigating the simplest possible model problem available. As a flow model we use the linearized Euler equations in one dimension and as the structure model we consider a spring.} } @TechReport{ it:2009-010, author = {Lars Ferm and Andreas Hellander and Per L{\"o}tstedt}, title = {An Adaptive Algorithm for Simulation of Stochastic Reaction-Diffusion Processes}, institution = it, department = tdb, year = 2009, number = {2009-010}, month = apr, abstract = {We propose an adaptive hybrid method suitable for stochastic simulation of diffusion dominated reaction-diffusion processes. For such systems, simulation of the diffusion requires the predominant part of the computing time. In order to reduce the computational work, the diffusion in parts of the domain is treated macroscopically, in other parts with the tau-leap method and in the remaining parts with Gillespie's stochastic simulation algorithm (SSA) as implemented in the next subvolume method (NSM). The chemical reactions are handled by SSA everywhere in the computational domain. A trajectory of the process is advanced in time by an operator splitting technique and the time steps are chosen adaptively. The spatial adaptation is based on estimates of the errors in the tau-leap method and the macroscopic diffusion. The accuracy and efficiency of the method are demonstrated in examples from molecular biology where the domain is discretized by unstructured meshes.} } @TechReport{ it:2009-009, author = {Sofia Eriksson and Jan Nordstr{\"o}m}, title = {Analysis of the Order of Accuracy for Node-Centered Finite Volume Schemes}, institution = it, department = tdb, year = 2009, number = {2009-009}, month = mar, abstract = {The order of accuracy of the node-centered finite volume methods is analyzed, and the analysis is based on an exact derivation of the numerical errors in one dimension. The accuracy for various types of grids are considered. Numerical simulations and analysis are performed for both a hyperbolic and a eliptic case, and the results agree. The impact of weakly imposed boundary conditions is analyzed and verified numerically. We show that the error contribution from the primal and dual grid can be treated separately.} } @TechReport{ it:2009-008, author = {Henrik Johansson}, title = {A Meta-Partitioner for Run-Time Selection and Evaluation of Multiple Partitioning Algorithms for {SAMR} Grid Hierarchies}, institution = it, department = tdb, year = 2009, number = {2009-008}, month = mar, abstract = {Parallel structured adaptive mesh refinement (SAMR) methods increase the efficiency of the numerical solution to partial differential equations. These methods use an adaptive grid hierarchy to dynamically assign computational resources to areas with large solution errors. The grid hierarchy needs to be repeatedly re-partitioned and distributed over the processors but no single partitioning algorithm performs well for all hierarchies. This paper presents an extended and improved version of the Meta-Partitioner, a partitioning framework that uses the state of the application to autonomously select, configure, invoke, and evaluate partitioning algorithms during run-time. The performance of the partitioning algorithms are predicted using historical performance data for grid hierarchies similar to the current hierarchy. At each re-partitioning, a user-specified number of partitioning algorithms are selected and invoked. When multiple partitionings are constructed, the performance of each partitioning is evaluated during run-time and the best partitioning is selected. The performance evaluation shows huge improvements for the two most performance-inhibiting factors --- the load imbalance and the synchronization delays. On average, the load imbalance is increased by only 11.5\% and the synchronization delays by 13.6\% compared to the optimal results from 768 different hybrid partitioning algorithms.} } @TechReport{ it:2009-007, author = {Henrik Johansson}, title = {Run-Time Selection of Partitioning Algorithms for Parallel {SAMR} Applications}, institution = it, department = tdb, year = 2009, number = {2009-007}, month = mar, abstract = {Parallel structured adaptive mesh refinement methods decrease the execution time and memory requirements of partial differential equation solvers. These methods result in an adaptive and dynamic grid hierarchy that repeatedly needs to be re-partitioned and distributed over the processors. No single partitioning algorithm can consistently construct high-quality partitionings for all possible grid hierarchies. Instead, the partitioning algorithm needs to be selected during run-time. In this paper, an initial implementation of the Meta-Partitioner is presented. At each re-partitioning, the Meta-Partitioner autonomously selects, configures, and invokes the partitioning algorithm predicted to result in the best performance. To predict the performance of the partitioning algorithms, the Meta-Partitioner uses historic performance data for grid hierarchies with properties similar to the current hierarchy. The Meta-Partitioner focuses the partitioning effort on the most performance-inhibiting factor --- either the load imbalance or the synchronization delays. The performance evaluation shows a small but noticeable performance increase compared to the best static algorithm. Compared to the average performance for a large number of partitioning algorithms, the Meta-Partitioner consistently generates partitionings with a significantly better performance. } } @TechReport{ it:2009-006, author = {Jan Nordstr{\"o}m and Jing Gong and van der Weide, Edwin and Magnus Sv{\"a}rd}, title = {A Stable and Conservative High Order Multi-block Method for the Compressible Navier-Stokes Equations}, institution = it, department = tdb, year = 2009, number = {2009-006}, month = feb, abstract = {A stable and conservative high order multi-block method for the time-dependent compressible Navier-Stokes equations has been developed. Stability and conservation are proved using summation-by-parts operators, weak interface conditions and the energy method. This development makes it possible to exploit the efficiency of the high order finite difference method for non-trivial geometries. The computational results corroborate the theoretical analysis.} } @TechReport{ it:2009-005, author = {Sven-Olof Nystr{\"o}m}, title = {Ideas for a new Erlang}, institution = it, department = csd, year = 2009, number = {2009-005}, month = feb, abstract = {This paper presents some thoughts and ideas on the future development of Erlang. Among the topics are: an alternative to Erlang's selective receive, a simple language mechanism to allow function in-lining across module boundaries, a new mechanism for introducing local variables with a more cleanly defined semantics, and a mini-language to allow the efficient implementation of low-level algorithms.} } @TechReport{ it:2009-004, author = {Arnold Pears and Lauri Malmi}, title = {The 8th Koli Calling International Conference on Computing Education Research}, institution = it, year = 2009, number = {2009-004}, month = feb, note = {Updated June 2009.}, abstract = {The contents of this volume are the culmination of nearly a year of planning and effort on the part of both the local organising committee and the conference chairs. However, we were not working alone! Without an active community of researchers doing quality research and writing papers, a conference like Koli has no function or purpose. Consequently, a large part of the success of Koli Calling lies in its vibrant research community. It is your submissions that have made it possible for us to select this year's crop of interesting and thought provoking contributions.} } @TechReport{ it:2009-003, author = {Erik Nordstr{\"o}m and Per Gunningberg and Christian Rohner}, title = {A Search-based Network Architecture for Mobile Devices}, institution = it, department = docs, year = 2009, number = {2009-003}, month = jan, abstract = {This paper presents the Haggle network architecture and experimental measurements of its performance in a realistic environment. Haggle provides a search-based data dissemination framework for mobile opportunistic communication environments, making it easy to share content directly between intermittently connected mobile devices. Haggle's novel approach is based on its identification of search as a first class operation for data-centric applications. We show how search can be used for resolution (mapping data to interested receivers) and prioritization of sending and receiving data during encounters between nodes. Haggle provides underlying functionality for neighbor discovery, resource management and resolution -- thus removing the need to implement such features in applications. Haggle has been implemented for several platforms. This paper presents experimental results, the most interesting of which demonstrates the live operation of Haggle on mobile phones in an office environment.} } @TechReport{ it:2009-002, author = {Anna Eckerdal}, title = {Ways of Thinking and Practising in Introductory Programming}, institution = it, department = tdb, year = 2009, number = {2009-002}, abstract = {In computer programming education it is generally acknowledged that students learn practical skills and concepts largely by practising. In addition it is widely reported that many students face great difficulties in their learning, despite great efforts during many decades to improve programming education. The paper investigates and discusses the relation between novice computer programming students' conceptual and practical learning. To this end the present research uses Ways of Thinking and Practising, WTP as a theoretical framework. In the present research Thinking is discussed in terms of students' learning of concepts, while Practising is discussed as common novice students' programming activities. Based on two empirical studies it is argued that there exists a mutual and complex dependency between conceptual learning and practise in students' learning process. It is hard to learn one without the other, and either of them can become an obstacle that hinders further learning. Empirical findings point to the need to research the relationship between conceptual understanding and practise to better understand students' learning process. The paper demonstrates a way to research how students' learning of practise and concepts are related. Results from a phenomenographic analysis on novice programming students' understanding of some central concepts are combined with an analysis based on elements from variation theory of the students' programming activities. It is shown that different levels of proficiency in programming activities as well as qualitatively different levels of conceptual understandings are related to dimensions of variation. The dimensions of variation serve as interfaces between the activities and conceptual understandings. If a dimension is discerned, this can facilitate coming to richer conceptual understandings and learning additional activities. } } @TechReport{ it:2009-001, author = {Arne Andersson and Jim Wilenius}, title = {A New Analysis of Revenue in the Combinatorial and Simultaneous Auction}, institution = it, department = csd, year = 2009, number = {2009-001}, month = jan, note = {Updated May 2009.}, abstract = {We prove that in many cases, a first-price sealed-bid combinatorial auction gives higher expected revenue than a sealed-bid simultaneous auction. This is the first theoretical evidence that combinatorial auctions indeed generate higher revenue, which has been a common belief for decades. We use a model with many bidders and items, where bidders are of two types: (i) single-bidders interested in only one item and (ii) synergy-bidders, each interested in one random combination of items. We provide an upper bound on the expected revenue for simultaneous auctions and a lower bound on combinatorial auctions. Our bounds are parameterized on the number of bidders and items, combination size, and synergy. We derive an asymptotic result, proving that as the number of bidders approach infinity, expected revenue of the combinatorial auction will be higher than that of the simultaneous auction. We also provide concrete examples where the combinatorial auction is revenue-superior.} } @TechReport{ it:2008-026, author = {Bj{\"o}rn Holmberg}, title = {Stereoscopic Estimation of Surface Movement from Inter-Frame Matched Skin Texture}, institution = it, department = syscon, year = 2008, number = {2008-026}, month = oct, abstract = {Marker-less human motion analysis is currently a hot topic in the research community. In this study three dimensional motion of a human limb is estimated using a large number of matched skin texture image patches. These two dimensional matches are triangulated and also matched to the next time frame. With these matched three dimensional points a Least Squares estimate of the rigid body motion obtained using standard methods. This motion estimate is subsequently compared to a marker based estimate acquired from a synchronized marker system. The results show that this approach can be used for motion estimation but with less accurate results than the marker based system that is to be considered as the clinically used standard. However, the correlation surfaces indicate that the method has potential if for example subpixel correlation algorithms were to be employed. } } @TechReport{ it:2008-025, author = {Bj{\"o}rn Holmberg}, title = {High Dimensional Human Motion Estimation using Particle Filtering}, institution = it, department = syscon, year = 2008, number = {2008-025}, month = oct, abstract = {An anatomical model of a human thigh and shank segment is built. This 19 degree of freedom model is used in a particle filtering implementation to estimate the model state based on simulated data. The novelty of this paper is in the use of the particle filter with such a high dimensional model as well as the application on a new type of data. This new data type is inter-frame matched 3D points on the skin surface, based on triangulation. The results are very encouraging in comparison to state of the art contributions. The present implementation is very demanding in terms of computations and hence do not lend itself to real time applications.} } @TechReport{ it:2008-024, author = {Therese Bohlin and Bengt Jonsson}, title = {Regular Inference for Communication Protocol Entities}, institution = it, department = docs, year = 2008, number = {2008-024}, month = sep, abstract = {Existing algorithms for regular inference (aka automata learning) allows to infer a finite state machine model of a system under test (SUT) by observing the output that the SUT produces in response to selected sequences of input. In this paper we present an approach using regular inference to construct models of communication protocol entities. Entities of communication protocols typically take input messages in the format of a protocol data unit (PDU) type together with a number of parameters and produce output of the same format. We assume that parameters from input can be stored in state variables of communication protocols for later use. A model of a communication protocol is usually structured into control states. Our goal is to infer symbolic extended finite state machine models of communication protocol entities with control states in the model that are similar to the control states in the communication protocol. In our approach, we first apply an existing regular inference algorithm to a communication protocol entity to generate a finite state machine model of the entity. Thereafter we fold the generated model into a symbolic extended finite state machine model with locations and state variables. We have applied parts of our approach to an executable specification of the Mobile Arts Advanced Mobile Location Center (A-MLC) protocol and evaluated the results.} } @TechReport{ it:2008-023, author = {Pierre Flener and Justin Pearson and Meinolf Sellmann}, title = {Static and Dynamic Structural Symmetry Breaking}, institution = it, department = csd, year = 2008, number = {2008-023}, month = sep, abstract = {We reconsider the idea of structural symmetry breaking for constraint satisfaction problems (CSPs). We show that the dynamic dominance checks used in symmetry breaking by dominance-detection search for CSPs with piecewise variable and value symmetries have a static counterpart: there exists a set of constraints that can be posted at the root node and that breaks all the compositions of these (unconditional) symmetries. The amount of these symmetry-breaking constraints is linear in the size of the problem, and yet they are able to remove a super-exponential number of symmetries on both values and variables. Moreover, we compare the search trees under static and dynamic structural symmetry breaking when using fixed variable and value orderings. These results are then generalised to wreath-symmetric CSPs with both variable and value symmetries. We show that there also exists a polynomial-time dominance-detection algorithm for this class of CSPs, as well as a linear-sized set of constraints that breaks these symmetries statically.} } @TechReport{ it:2008-022, author = {Josef Cullhed and Stefan Engblom and Andreas Hellander}, title = {The {URDME} Manual version 1.0}, institution = it, department = tdb, year = 2008, number = {2008-022}, month = sep, abstract = {We have developed URDME, a general software for simulation of stochastic reaction-diffusion processes on unstructured meshes. This allows for a more flexible handling of complicated geometries and curved boundaries compared to simulations on structured, cartesian meshes. The underlying algorithm is the next subvolume method (NSM), extended to unstructured meshes by obtaining jump coefficients from the finite element formulation of the corresponding macroscopic equation. In this manual, we describe how to use the software together with COMSOL Multiphysics 3.4 and Matlab to set up simulations. We provide a detailed account of the code structure and of the available interfaces. This makes modifications and extensions of the code possible. We also give two detailed examples, in which we describe the process of simulating and visualizing two models from the systems biology literature in a step-by-step manner.} } @TechReport{ it:2008-021, author = {{\AA}sa Cajander and Elina Eriksson and Jan Gulliksen and Iordanis Kavathatzopoulos and Bengt Sandblad}, title = {Anv{\"a}ndbara IT-st{\"o}d - En utv{\"a}rdering av ett forskningsprojekt vid CSN, Centrala studiest{\"o}dsn{\"a}mnden}, institution = it, department = hci, year = 2008, number = {2008-021}, month = aug, note = {In Swedish}, abstract = {Utvecklingsr{\aa}det f{\"o}r den statliga sektorn har tagit ett initiativ f{\"o}r att st{\"o}dja myndigheternas arbete med att f{\"o}rb{\"a}ttra arbetsmilj{\"o}n och s{\"a}nka sjukfr{\aa}nvaron. D{\"a}rf{\"o}r startades programmet Satsa friskt. Programmet ger st{\"o}d i form av ekonomiskt bidrag och experthj{\"a}lp till olika projekt inom statliga myndigheter. Ett av Satsa Friskts insatsomr{\aa}den {\"a}r ~M{\"a}nniska ~ IT~. Inom detta omr{\aa}de har det sedan 2004 bedrivits ett antal olika projekt vid flera statliga verk. Vi har fr{\aa}n MDI (avdelningen f{\"o}r m{\"a}nniska-datorinteraktion, institutionen f{\"o}r informationsteknologi) vid Uppsala universitet varit mer omfattande inblandad i tre olika s{\aa}dana projekt: vid CSN, Migrationsverket och SMHI. Syftet och inneh{\aa}llet har varierat en del mellan de olika projekten, men ett huvudsakligt fokus har varit hur man kan se till att de framtida IT-st{\"o}dda arbetena inom myndigheterna blir effektivare och arbetsmilj{\"o}n b{\"a}ttre. Genom att b{\"a}ttre beakta anv{\"a}ndbarhets- och arbetsmilj{\"o}aspekter vid kravst{\"a}llande, utveckling och inf{\"o}rande av IT-st{\"o}d kan man st{\"o}dja en positiv utveckling av verksamheten som s{\aa}dan liksom av arbetsinneh{\aa}ll och arbetsmilj{\"o} f{\"o}r den enskilde individen. Resultatet kan d{\aa} bli effektivare verksamhet, b{\"a}ttre service till kunder och ett h{\"a}lsosammare och h{\aa}llbarare arbete f{\"o}r de anst{\"a}llda. Denna rapport beskriver det arbete som under {\aa}ren 2005-2007 genomf{\"o}rts i samverkan mellan CSN, Centrala studiest{\"o}dsn{\"a}mnden, och MDI, Uppsala universitet. F{\"o}rutom en kort beskrivning av inneh{\aa}llet i det arbete som gjorts inom projektet redovisar denna rapport en utv{\"a}rdering av resultatet, de l{\"a}rdomar som vi gjort fr{\aa}n projektet, de r{\aa}d vi kan ge till andra som vill ta del av dessa l{\"a}rdomar samt en f{\"o}rteckning av de rapporter av olika slag som producerats.} } @TechReport{ it:2008-020, author = {Stefan Engblom}, title = {Parallel in Time Simulation of Multiscale Stochastic Chemical Kinetics}, institution = it, department = tdb, year = 2008, number = {2008-020}, month = aug, note = {Extended abstract to appear in Proceedings of ICNAAM 2008}, abstract = {A version of the time-parallel algorithm parareal is analyzed and applied to stochastic models in chemical kinetics. A fast predictor at the macroscopic scale (evaluated in serial) is available in the form of the usual reaction rate equations. A stochastic simulation algorithm is used to obtain an exact realization of the process at the mesoscopic scale (in parallel). The underlying stochastic description is a jump process driven by the Poisson measure. A convergence result in this arguably difficult setting is established suggesting that a homogenization of the solution is advantageous. We devise a simple but highly general such technique. Three numerical experiments on models representative to the field of computational systems biology illustrate the method. For non-stiff problems, it is shown that the method is able to quickly converge even when stochastic effects are present. For stiff problems we are instead able to obtain fast convergence to a homogenized solution. Overall, the method builds an attractive bridge between on the one hand, macroscopic deterministic scales and, on the other hand, mesoscopic stochastic ones. This construction is clearly possible to apply also to stochastic models within other fields. } } @TechReport{ it:2008-019, author = {Ken Mattsson and Frank Ham and Gianluca Iaccarino}, title = {Stable Boundary Treatment for the Wave Equation on Second-Order Form}, institution = it, department = tdb, year = 2008, number = {2008-019}, month = jun, abstract = {A stable and accurate boundary treatment is derived for the second-order wave equation. The domain is discretized using narrow-diagonal summation by parts operators and the boundary conditions are imposed using a penalty method, leading to fully explicit time integration. This discretization yields a stable and efficient scheme. The analysis is verified by numerical simulations in one-dimension using high-order finite difference discretizations, and in three-dimensions using an unstructured finite volume discretization.} } @TechReport{ it:2008-018, author = {Pierre Flener and Xavier Lorca}, title = {A Complete Characterisation of the Classification Tree Problem}, institution = it, department = csd, year = 2008, number = {2008-018}, month = jun, abstract = {Finding a classification tree over a given set of elements that is compatible with a given family of classification trees over subsets of that set is a common problem in many application areas, such as the historical analysis of languages, the theory of relational databases, and phylogenetic supertree construction. We present a constraint programming approach to this problem. First, we introduce a natural and compact graph representation of a family of classification trees. Second, we provide a complete filtering algorithm for the classification tree problem, based on this normal form.} } @TechReport{ it:2008-017, author = {Henrik Johansson}, title = {Design and Implementation of a Dynamic and Adaptive Meta-Partitioner for Parallel {SAMR} Grid Hierarchies}, institution = it, department = tdb, year = 2008, number = {2008-017}, month = jun, abstract = {In this paper we present a pilot implementation of the Meta-Partitioner, a partitioning framework that automatically selects, configures, and invokes suitable partitioning algorithms for Structured Adaptive Mesh Refinement (SAMR) applications. Efficient use of SAMR on parallel computers requires that the dynamic grid hierarchy is repeatedly repartitioned and redistributed. The partitioning process needs to consider all factors that contribute to the run-time, i.e. computational load, communication volume, synchronization delays, and data movement. There is no partitioning algorithm that performs well for all possible grid hierarchies --- instead the algorithms must be selected dynamically during run-time. At each repartitioning, the Meta-Partitioner uses performance data from previously encountered application states to select the partitioning algorithm with the best predicted performance. Before the repartition, we determine a partitioning focus to direct the partitioning effort to the performance-inhibiting factor that currently has the largest impact on the execution time. The implementation uses component-based software engineering (CBSE) to allow for easy expansion and modification. Also, by employing CBSE it is easy to adapt existing SAMR engines for use with the Meta-Partitioner.} } @TechReport{ it:2008-016, author = {Parosh Aziz Abdulla and Pavel Krcal and Wang Yi}, title = {R-automata}, institution = it, department = docs, year = 2008, number = {2008-016}, month = jun, abstract = {We introduce \emph{R-automata} -- a model for analysis of systems with resources which are consumed in small parts but which can be replenished at once. An R-automaton is a finite state machine which operates on a finite number of unbounded counters (modeling the resources). The values of the counters can be incremented, reset to zero, or left unchanged along the transitions. We define the language accepted by an R-automaton relative to a natural number $D$ as the set of words allowing a run along which no counter value exceeds $D$. As the main result, we show decidability of the universality problem, i.e., the problem whether there is a number $D$ such that the corresponding language is universal. The decidability proof is based on a reformulation of the problem in the language of finite monoids and solving it using the factorization forest theorem. This approach extends the way in which the factorization forest theorem was used to solve the limitedness problem for distance automata in Simon, 1994. We also show decidability of the non-emptiness problem and the limitedness problem, i.e., whether there is a natural number $D$ such that the corresponding language is non-empty resp.\ all the accepted words can also be accepted with counter values smaller than $D$. Finally, we extend the decidability results to R-automata with B\"uchi acceptance conditions.} } @TechReport{ it:2008-015, author = {Parosh Aziz Abdulla and Ahmed Bouajjani and Jonathan Cederberg and Fr{\'e}d{\'e}ric Haziza and Ahmed Rezine}, title = {Monotonic Abstraction for Programs with Dynamic Memory Heaps}, institution = it, department = docs, year = 2008, number = {2008-015}, optmonth = {}, abstract = {We propose a new approach for automatic verification of programs with dynamic heap manipulation. The method is based on symbolic (backward) reachability analysis using upward-closed sets of heaps w.r.t. an appropriate preorder on graphs. These sets are represented by a finite set of minimal graph patterns corresponding to a set of bad configurations. We define an abstract semantics for the programs which is monotonic w.r.t. the preorder. Moreover, we prove that our analysis always terminates by showing that the preorder is a well-quasi ordering. Our results are presented for the case of programs with 1-next selector. We provide experimental results showing the effectiveness of our approach. } } @TechReport{ it:2008-014, author = {Olga Grinchtein and Bengt Jonsson}, title = {Inference of Event-Recording Automata using Timed Decision Trees}, institution = it, department = docs, year = 2008, number = {2008-014}, month = apr, abstract = {In \emph{regular inference}, the problem is to infer a regular language, typically represented by a deterministic finite automaton (DFA) from answers to a finite set of membership queries, each of which asks whether the language contains a certain word. There are many algorithms for learning DFAs, the most well-known being the $L^*$ algorithm due to Dana Angluin. However, there are almost no extensions of these algorithms to the setting of timed systems. We present an algorithm for inferring a model of a timed system using Angluin's setup. One of the most popular model for timed system is timed automata. Since timed automata can freely use an arbitrary number of clocks, we restrict our attention to systems that can be described by \emph{event-recording automata} (DERAs). In previous work, we have presented an algorithm for inferring a DERA in the form of a region graph. In this paper, we present a novel inference algorithm for DERAs, which avoids constructing a (usually prohibitively large) region graph. We must then develop techniques for inferring guards on transitions of a DERA. Our construction deviates from previous work on inference of DERAs in that it first constructs a so called timed decision tree from observations of system behavior, which is thereafter folded into an automaton.} } @TechReport{ it:2008-013, author = {Olga Grinchtein and Bengt Jonsson and Martin Leucker}, title = {Learning of Event-Recording Automata}, institution = it, department = docs, year = 2008, number = {2008-013}, month = apr, abstract = {In regular inference, a regular language is inferred from answers to a finite set of membership queries, each of which asks whether the language contains a certain word. One of the most well-known regular inference algorithms is the $L^*$ algorithm due to Dana Angluin. However, there are almost no extensions of these algorithms to the setting of timed systems. We extend Angluin's algorithm for on-line learning of regular languages to the setting of timed systems. Since timed automata can freely use an arbitrary number of clocks, we restrict our attention to systems that can be described by deterministic event-recording automata (DERAs). We present three algorithms, $TL_sg^*$, $TL_nsg^*$ and $TL_s^*$, for inference of DERAs. In $TL_sg^*$ and $TL_nsg^*$, we further restrict event-recording automata to be event-deterministic in the sense that each state has at most one outgoing transition per action; learning such an automaton becomes significantly more tractable. The algorithm $TL_nsg^*$ builds on $TL_sg^*$, by attempts to construct a smaller (in number of locations) automaton. Finally, $TL_s^*$ is a learning algorithm for a full class of deterministic event-recording automata, which infers a so called \emph{simple} DERA, which is similar in spirit to the region graph.} } @TechReport{ it:2008-012, author = {Stefan Engblom and Lars Ferm and Andreas Hellander and Per L{\"o}tstedt}, title = {Simulation of Stochastic Reaction-Diffusion Processes on Unstructured Meshes}, institution = it, department = tdb, year = 2008, number = {2008-012}, month = apr, abstract = {Stochastic chemical systems with diffusion are modeled with a reaction-diffusion master equation. On a macroscopic level, the governing equation is a reaction-diffusion equation for the averages of the chemical species. On a mesoscopic level, the master equation for a well stirred chemical system is combined with Brownian motion in space to obtain the reaction-diffusion master equation. The space is covered by an unstructured mesh and the diffusion coefficients on the mesoscale are obtained from a finite element discretization of the Laplace operator on the macroscale. The resulting method is a flexible hybrid algorithm in that the diffusion can be handled either on the meso- or on the macroscale level. The accuracy and the efficiency of the method are illustrated in three numerical examples inspired by molecular biology.} } @TechReport{ it:2008-011, author = {Per Pettersson and Gianluca Iaccarino and Jan Nordstr{\"o}m}, title = {Numerical Analysis of {B}urgers' Equation with Uncertain Boundary Conditions Using the Stochastic {G}alerkin Method}, institution = it, department = tdb, year = 2008, number = {2008-011}, month = mar, abstract = {Burgers' equation with stochastic initial and boundary conditions is investigated by a polynomial chaos expansion approach where the solution is represented as a series of stochastic, orthogonal polynomials. The analysis of wellposedness for the stochastic Burgers' equation follows the pattern of that of the deterministic Burgers' equation. We use dissipation and spatial derivative operators satisfying the summation by parts property and weak boundary conditions to ensure stability. Similar to the deterministic case, the time step for hyperbolic stochastic problems solved with explicit methods is proportional to the inverse of the largest eigenvalue of the system matrix. The time step naturally decreases compared to the deterministic case since the spectral radius of the continuous problem grows with the number of polynomial chaos coefficients. Analysis of the characteristics of a truncated system gives a qualitative description of the development of the system over time for different initial and boundary conditions. Knowledge of the initial and boundary expected value and variance is not enough to get a unique solution. Also, the sign of the polynomial chaos coefficients must be known. The deterministic component (expected value) of the solution is affected by the modeling of uncertainty. A shock discontinuity in a purely deterministic problem can be made smooth by assuming uncertain boundary conditions.} } @TechReport{ it:2008-010, author = {Parosh Aziz Abdulla and Noomene Ben Henda and Giorgio Delzanno and Fr{\'e}d{\'e}ric Haziza and Ahmed Rezine}, title = {Parameterized Tree Systems}, institution = it, department = docs, year = 2008, number = {2008-010}, month = mar, note = {Accepted at FORTE'08: \url{http://www-higashi.ist.osaka-u.ac.jp/FORTE08/}.}, abstract = {Several recent works have considered \emph{parameterized verification}, i.e.\ automatic verification of systems consisting of an arbitrary number of finite-state processes organized in a \emph{linear array}. The aim of this paper is to extend these works by giving a simple and efficient method to prove safety properties for systems with \emph{tree-like} architectures. A process in the system is a finite-state automaton and a transition is performed jointly by a process and its parent and children processes. The method derives an over-approximation of the induced transition system, which allows the use of finite trees as symbolic representations of infinite sets of configurations. Compared to traditional methods for parameterized verification of systems with tree topologies, our method does not require the manipulation of tree transducers, hence its simplicity and efficiency. We have implemented a prototype which works well on several nontrivial tree-based protocols.} } @TechReport{ it:2008-009, author = {Peter Naucl{\'e}r and Torsten S{\"o}derstr{\"o}m}, title = {Linear and Nonlinear Regression with Application to Unbalance Estimation}, institution = it, department = syscon, year = 2008, number = {2008-009}, month = mar, abstract = {This paper considers estimation of parameters that enters nonlinearly in a regression model. The problem formulation is closely connected to unbal- ance estimation of rotating machinery. The parameter estimation problem can after approximation be formulated as a linear estimation procedure, while neglecting the effects of the disturbing term. Two such estimators are derived. In addition, a third approach that handles the uncertainty in a statistically sound way is presented. The three methods are compared and analyzed with respect to their statistical accuracy. Using the example of unbalance estimation of a separator, the nonlinear approach is shown to outperform the other two.} } @TechReport{ it:2008-008, author = {Mei Hong and Torsten S{\"o}derstr{\"o}m}, title = {Relations between Bias-Eliminating Least Squares, the {F}risch Scheme and Extended Compensated Least Squares Methods for Identifying Errors-in-Variables Systems}, institution = it, department = syscon, year = 2008, number = {2008-008}, month = mar, abstract = {There are many methods for identifying errors-in-variables systems. Among them Bias-Eliminating Least Squares (BELS), the Frisch scheme and Extended Compensated Least Squares (ECLS) methods are attractive approaches because of their simplicity and good estimation accuracy. These three methods are all based on a bias-compensated least-squares (BCLS) principle. In this report, the relations between them are considered. In particular, the nonlinear equations utilized in these three methods are proved to be equivalent under different noise conditions. It is shown that BELS, Frisch and ECLS methods have the same asymptotic estimation accuracy providing the same extended vector is used.} } @TechReport{ it:2008-007, author = {Torbj{\"o}rn Wigren and Linda Brus}, title = {{MATLAB} Software for Recursive Identification and Scaling Using a Structured Nonlinear Black-box Model - Revision 4}, institution = it, department = syscon, year = 2008, number = {2008-007}, month = mar, note = {Revised version of nr 2007-013. The software package can be downloaded from \url{http://www.it.uu.se/research/publications/reports/2008-007/NRISoftwareRev4.zip}. \textbf{Note} that the software package was updated on 2010-03-16.}, abstract = {This reports is intended as a users manual for a package of MATLAB scripts and functions, developed for recursive prediction error identification of nonlinear state space systems and nonlinear static systems. The core of the package is an implementation of an output error identification and scaling algorithm. The algorithm is based on a continuous time, structured black box state space model of a nonlinear system. An RPEM algorithm for recursive identification of nonlinear static systems, that re-uses the parameterization of the nonlinear ODE model, is also included in the software package. In this version of the software an initialization algorithm based on Kalman filter theory has been added to the package. The purpose of the initialization algorithm is to find initial parameters for the prediction error algorithm, and thus reduce the risk of convergence to local minima for the nonlinear identification problem. The software can only be run off-line, i.e. no true real time operation is possible. The algorithms are however implemented so that true on-line operation can be obtained by extraction of the main algorithmic loop. The user must then provide the real time environment. The software package contains scripts and functions that allow the user to either input live measurements or to generate test data by simulation. The scripts and functions for the setup and execution of the identification algorithms are somewhat more general than what is described in the references. There is e.g. support for automatic re-initiation of the algorithms using the parameters obtained at the end of a previous identification run. This allows for multiple runs through a set of data, something that is useful for data sets that are too short to allow convergence in a single run. The re-initiation step also allows the user to modify the degrees of the polynomial model structure and to specify terms that are to be excluded from the model. This makes it possible to iteratively re-fine the estimated model using multiple runs. The functionality for display of results include scripts for plotting of data, parameters, prediction errors, eigenvalues and the condition number of the Hessian. The estimated model obtained at the end of a run can be simulated and the model output plotted, alone or together with the data used for identification. Model validation is supported by two methods apart from the display functionality. First, a calculation of the RPEM loss function can be performed, using parameters obtained at the end of an identification run. Secondly, the accuracy as a function of the output signal amplitude can be assessed.} } @TechReport{ it:2008-006, author = {Linda Brus}, title = {{MATLAB} Software for Feedforward Optimal Control of Systems with Flow Varying Time Delays - Revision 2}, institution = it, department = syscon, year = 2008, number = {2008-006}, month = mar, note = {The software package can be downloaded from \url{http://www.it.uu.se/research/publications/reports/2008-006/NOCSoftwareV2.zip}} , abstract = {This report describes a software package for optimal control of a nonlinear system with flow variant time delay. The software was developed for control of a solar plant in Seville, Spain, and is tailored to this application. The optimization is performed using an algorithm with gradient based minimum search. } } @TechReport{ it:2008-005, author = {Stefan Engblom}, title = {A Discrete Spectral Method for the Chemical Master Equation}, institution = it, department = tdb, year = 2008, number = {2008-005}, month = feb, note = {Supersedes report nr 2006-036. Updated Feb 29, 2008.}, abstract = {As an equivalent formulation of the Markov-assumption of stochastic processes, the master equation of chemical reactions is an accurate description of general systems in chemistry. For $D$ reacting species this is a differential-difference equation in $D$ dimensions, exactly soluble for very simple systems only. We present and analyze a novel solution strategy based upon a Galerkin spectral method with an inherent natural adaptivity and a very favorable choice of basis functions. The method is demonstrated by the numerical solution of two model problems followed by two more realistic systems taken from molecular biology. It is shown that the method remains effective and accurate, providing a viable alternative to other solution methods when the dimensionality is not too high.} } @TechReport{ it:2008-004, author = {Arne Andersson and Jim Wilenius}, title = {A New Analysis of Combinatorial vs Simultaneous Auctions: Revenue and Efficiency}, institution = it, department = csd, year = 2008, number = {2008-004}, month = feb, abstract = {We address the fundamental issue of revenue and efficiency in the combinatorial and simultaneous auction using a novel approach. Specifically, upper and lower bounds are constructed for the first-price sealed-bid setting of these two auctions. The question of revenue is important yet very few results can be found in the literature. Only for very small instances with 2 items have comparisons been made. Krishna et. al. find that allowing combinatorial bids result in lower revenue compared to a second price simultaneous auction. We formulate a lower bound on the first-price combinatorial auction and an upper bound on the first-price simultaneous auction for larger problems with several items and many bidders, in a model where bidders have synergies from winning a specific set of items. We show that the combinatorial auction is revenue superior to the simultaneous auction for a specific instance in pure symmetric equilibrium and give two generalized upper bounds on revenue for the simultaneous auction.} } @TechReport{ it:2008-003, author = {Iordanis Kavathatzopoulos}, title = {Ett f{\"o}rb{\"a}ttrat verktyg f{\"o}r m{\"a}tning av anv{\"a}ndbarhet, stress och nytta: Andra f{\"o}rs{\"o}ket inom {CSN}}, institution = it, department = hci, year = 2008, number = {2008-003}, month = jan, note = {In Swedish.}, abstract = {Syftet med detta arbete {\"a}r att utveckla ett index f{\"o}r att beskriva anv{\"a}ndbarheten hos ITverktyg och hur anv{\"a}ndbarheten p{\aa}verkar effektiviteten. Enk{\"a}ten pr{\"o}vades inom CSN f{\"o}rst p{\aa} STIS2000. Resultaten ledde till en f{\"o}rb{\"a}ttrad version av enk{\"a}ten som pr{\"o}vades h{\"o}sten 2006 p{\aa} E-posthandl{\"a}ggningen. 427 personer deltog i unders{\"o}kningen. Sammanh{\aa}llningen av enk{\"a}tens delar och fr{\aa}gor samt tillf{\"o}rlitligheten har blivit b{\"a}ttre vilket g{\"o}r det l{\"a}ttare att utveckla ett anv{\"a}ndbarhetsindex. Svaren visar ocks{\aa} att deltagarna {\"a}r i stort neutrala n{\"a}r det g{\"a}ller anv{\"a}ndbarheten av E-posthandl{\"a}ggningen inom CSN.} } @TechReport{ it:2008-002, author = {Owe Axelsson and Janos Karatson}, title = {Equivalent Operator Preconditioning for Linear Elliptic Problems}, institution = it, department = tdb, year = 2008, number = {2008-002}, month = jan, note = {A preliminary version of the same article is published as Preprint 2007-04, ELTE Dept. Appl. Anal. Comp. Math., \url{http://www.cs.elte.hu/applanal/preprints}}, abstract = {The numerical solution of linear elliptic partial differential equations most often involves a finite element or finite difference discretization. To preserve sparsity, the arising system is normally solved using an iterative solution method, commonly a preconditioned conjugate gradient method. Preconditioning is a crucial part of such a solution process. It is desirable that the total computational cost will be optimal, i.e. proportional to the degrees of freedom of the approximation used, which also includes mesh independent convergence of the iteration. This paper surveys the equivalent operator approach, which has proven to provide an efficient general framework to construct such preconditioners. Hereby one first approximates the given differential operator by some simpler differential operator, and then one chooses as preconditioner the discretization of this operator for the same mesh. In this survey we give a uniform presentation of this approach, including theoretical foundation and several practically important applications.} } @TechReport{ it:2008-001, author = {Bj{\"o}rn Holmberg and Bo Nordin and Ewert Bengtsson and H{\aa}kan Lanshammar}, title = {On the Plausibility of Using Skin Texture as Virtual Markers in the Human Motion Analysis Context, a {2D} Study}, institution = it, department = syscon, year = 2008, number = {2008-001}, month = jan, abstract = {For the first time it is shown that skin texture can be used as a means to match human limb surfaces between different image frames. The results are limited to motion in two dimensions. It is shown that images with a resolution that can be produced in today existing video camera hardware are usable. This is very encouraging for the future of clinical marker free human motion analysis applications. The next step will be to investigate if these results can be extended to three dimensions using a stereo camera setup. The method uses histogram information as a means to match small image patches to one another. The histogram matching is performed using a Mutual Information criterion as the cost function and Simulated Annealing as the optimization algorithm. } } @TechReport{ it:2007-035, author = {Mayank Saksena and Oskar Wibling and Bengt Jonsson}, title = {Graph Grammar Modeling and Verification of Ad Hoc Routing Protocols}, institution = it, department = docs, year = 2007, number = {2007-035}, month = dec, note = {Updated March 2008. Extended abstract to appear in proc. 14th Int. Conf. on Tools and Algorithms for the Construction and Analysis of Systems (TACAS 2008).}, abstract = {We present a technique for modeling and automatic verification of network protocols, based on graph transformation. It is suitable for protocols with a potentially unbounded number of nodes, in which the structure and topology of the network is a central aspect, such as routing protocols for ad hoc networks. Safety properties are specified as a set of undesirable global configurations. We verify that there is no undesirable configuration which is reachable from an initial configuration, by means of symbolic backward reachability analysis. In general, the reachability problem is undecidable. We implement the technique in a graph grammar analysis tool, and automatically verify several interesting non-trivial examples. Notably, we prove loop freedom for the DYMO ad hoc routing protocol. DYMO is currently on the IETF standards track, to potentially become an Internet standard.} } @TechReport{ it:2007-034, author = {Stefan Seipel and Lars Winkler Pettersson}, title = {{P}ixel{A}ctive{S}urface: A Tabletop Rear-Projection Display for Collaboration with Pixel-Accurate Interaction}, institution = it, department = hci, year = 2007, number = {2007-034}, month = dec, note = {Extended abstract appeared in ACM Conference on Supporting Group Work (GROUP'07).}, abstract = {The use of table-top displays as working environment provides a very natural way to present and interact with information. In particular when several users intend an unhindered face-to-face dialogue, horizontal displays have been introduced as efficient tools for collaborative work in shared physical spaces. Many of the hardware systems presented so far are composed of several independent and technically often clumsy components for the display and for tracking of users' head positions or gestures, respectively. This hampers, in practice, easy operation and a more widespread use of table-top displays in real working environments. In this paper, we present our technical solution for a fully integrated, small sized table-top visualization environment that provides head position tracking for two collaborating users, stereoscopic projection and high resolution multi-point screen interaction. Our solution is intended as a self-contained out-of-the-box system that is affordable and easy to use. The real novelty with our technique presented in this paper is, however, that we succeeded in combining high resolution and multi-point pen-based interaction technology based on optical pattern recognition with a rear-projection screen, which opens up for a wider field of applications not only limited to tabletop display environments.} } @TechReport{ it:2007-033, author = {Owe Axelsson and Radim Blaheta and Maya Neytcheva}, title = {A Black-Box Generalized Conjugate Gradient Minimum Residual Method Based on Variable Preconditioners and Local Element Approximations}, institution = it, department = tdb, year = 2007, number = {2007-033}, month = dec, abstract = {In order to control the accuracy of a preconditioner for an outer iterative process one often involves variable preconditioners. The variability may for instance be due to the use of inner iterations in the construction of the preconditioner. Both the outer and inner iterations may be based on some conjugate gradient type of method, e.g. generalized minimum residual methods. A background for such methods, including results about their computational complexity and rate of convergence, is given. It is then applied for a variable preconditioner arising for matrices partitioned in two-by-two block form. The matrices can be unsymmetric and also indefinite. The aim is to provide a black--box solver, applicable for all ranges of problem parameters such as coefficient jumps and anisotropy. When applying this approach for elliptic boundary value problems, in order to achieve the latter aim, it turns out to be efficient to use local element approximations of arising block matrices as preconditioners for the inner iterations. It is illustrated by numerical examples how the convergence rate of the inner-outer iteration method approaches that for the more expensive fixed preconditioner when the accuracies of the inner iterations increase.} } @TechReport{ it:2007-032, author = {Pierre Flener and Justin Pearson and Meinolf Sellmann and Van Hentenryck, Pascal and Magnus {\AA}gren}, title = {Structural Symmetry Breaking for Constraint Satisfaction Problems}, institution = it, department = csd, year = 2007, number = {2007-032}, month = nov, abstract = {In recent years, symmetry breaking for constraint satisfaction problems (CSPs) has attracted considerable attention. Various general schemes have been proposed to eliminate symmetries. In general, these schemes may take exponential space or time to eliminate all the symmetries. We identify several classes of CSPs that encompass many practical problems and for which symmetry breaking for various forms of value or variable interchangeability is tractable using dedicated search procedures. We also show the limits of efficient symmetry breaking for such dominance-detection schemes by proving intractability results for some classes of CSPs. } } @TechReport{ it:2007-031, author = {Niklas Hardenborg and Iordanis Kavathatzopoulos and Bengt Sandblad}, title = {Performing the Vision Seminar Process}, institution = it, department = hci, year = 2007, number = {2007-031}, month = nov, abstract = {The Vision Seminar Process is a process for developing an efficient, sustainable, IT-supported future work and a tool for proving a solid basis for the development of IT systems. The process provides a framework where practitioners and designers cooperate in the design of both sustainable work and usable IT systems. A practical approach is provided to facilitate for practitioners' reflective in-depth analysis of their work practices as well as to question and discuss their entire work situation and organization. This report gives a practical and basic description of the process procedure based on experiences from several projects where the process has been carried out in different contexts and organizations.} } @TechReport{ it:2007-030, author = {Parosh Abdulla and Noomene Ben Henda and Giorgio Delzanno and Ahmed Rezine}, title = {Handling Parameterized Systems with Non-Atomic Global Conditions}, institution = it, department = docs, year = 2007, number = {2007-030}, month = nov, note = {Updated 20 Mar 2008. To appear in the Proc. of VMCAI 2008}, abstract = {We consider verification of safety properties for parameterized systems with linear topologies. A process in the system is an extended automaton, where the transitions are guarded by both local and global conditions. The global conditions are non-atomic, i.e., a process allows arbitrary interleavings with other transitions while checking the states of all (or some) of the other processes. We translate the problem into model checking of infinite transition systems where each configuration is a labeled finite graph. We derive an over-approximation of the induced transition system, which leads to a symbolic scheme for analyzing safety properties. We have implemented a prototype and run it on several nontrivial case studies, namely non-atomic versions of Burn's protocol, Dijkstra's protocol, the Bakery algorithm, Lamport's distributed mutual exclusion protocol, and a two-phase commit protocol used for handling transactions in distributed systems. As far as we know, these protocols have not previously been verified in a fully automated framework.} } @TechReport{ it:2007-029, author = {Jing Gong and Jan Nordstr{\"o}m and van der Weide, Edwin}, title = {A Hybrid Method for the Unsteady Compressible {N}avier-{S}tokes Equations}, institution = it, department = tdb, year = 2007, number = {2007-029}, month = oct, abstract = {A hybrid method composed of finite difference-finite difference and finite difference-finite volume schemes for the time-dependent Navier-Stokes equations has been developed. A detailed analysis of the stability of the proposed algorithms, paying special attention to the stability of the interfaces between the subdomains is performed. We prove that the interface coupling is stable and conservative. This techniques makes it possible to combine the efficiency of the finite difference method and the flexibility of the finite volume schemes. We exemplify the procedure by using finite difference methods. The computational results corroborate the theoretical analysis.} } @TechReport{ it:2007-028, author = {Paul Sj{\"o}berg}, title = {{PDE} and {M}onte {C}arlo Approaches to Solving the Master Equation Applied to Gene Regulation}, institution = it, department = tdb, year = 2007, number = {2007-028}, month = oct, abstract = {The \textit{Fokker-Planck equation} (FPE) approximation is applied for a subspace of the state space of the \textit{chemical master equation} (CME). The CME-FPE-hybrid method exploits the lower cost of the FPE approximation compared to the full CME. A fourth order finite difference approximation of the FPE part of the hybrid is described and demonstrated on a biologically relevant model in five dimensions.} } @TechReport{ it:2007-027, author = {Andreas Hellander}, title = {Efficient Computation of Transient Solutions of the Chemical Master Equation Based on Uniformization and Quasi-{M}onte {C}arlo}, institution = it, department = tdb, year = 2007, number = {2007-027}, month = oct, abstract = {A Quasi-Monte Carlo method for the simulation of discrete time Markov chains is applied to the simulation of biochemical reaction networks. The continuous process is formulated as a discrete chain subordinate to a Poisson process using the method of uniformization. It is shown that a substantial reduction of the number of trajectories that is required for an accurate estimation of the probability density functions (PDF) can be achieved with this technique. The method is applied to the simulation of two model problems. Although the technique employed here does not address the typical stiffness of such systems, it is useful when computing the PDF by replication and the method can be used in conjuncture with hybrid methods that reduce the stiffness.} } @TechReport{ it:2007-026, author = {Linda Brus}, title = {{MATLAB} Software for Feedforward Optimal Control of Systems with Flow Varying Time Delays}, institution = it, department = syscon, year = 2007, number = {2007-026}, month = sep, note = {A revised version can be found as technical report nr 2008-006. The software package can be downloaded from \url{http://www.it.uu.se/research/publications/reports/2007-026/NOCSoftware.zip}} , abstract = {This report describes a software package for optimal control of a nonlinear system with flow variant time delay. The software was developed for control of a solar plant in Seville, Spain, and is tailored to this application. The optimization is performed using an algorithm with gradient based minimum search.} } @TechReport{ it:2007-025, author = {Peter Naucl{\'e}r and Torsten S{\"o}derstr{\"o}m}, title = {Polynomial Feedforward Design Techniques for a Mechanical Wave Diode System}, institution = it, department = syscon, year = 2007, number = {2007-025}, month = sep, abstract = {This paper considers feedforward control of extensional waves in a bar. The system is designed to have properties analogous to those of an electrical diode and is therefore referred to as a mechanical wave diode. We present three different feedforward control strategies. Two of them relies on an `ideal' design which is derived in the noise-free case, whereas the third is based on Wiener filtering theory. The control strategies are compared and evaluated for different signal models and in the presence of measurement noise. We show that the performance of the device is improved by using the (optimal) Wiener feedforward filter. } } @TechReport{ it:2007-024, author = {John H{\aa}kansson and Paul Pettersson}, title = {Partial Order Reduction for Verification of Real-Time Components}, institution = it, department = docs, year = 2007, number = {2007-024}, month = sep, note = {A shorter version will appear in the Proceedings of FORMATS 2007, LNCS 4762, pp 211-226.}, abstract = {We describe a partial order reduction technique for a real-time component model. Components are described as timed automata with data ports, which can be composed in static structures of unidirectional control and data flow. Compositions can be encapsulated as components and used in other compositions to form hierarchical models. The proposed partial order reduction technique uses a local time semantics for timed automata, in which time may progress independently in parallel automata which are resynchronized when needed. To increase the number of independent transitions and to reduce the problem of re-synchronizing parallel automata we propose, and show how, to use information derived from the composition structure of an analyzed model. Based on these ideas, we present a reachability analysis algorithm that uses an ample set construction to select which symbolic transitions to explore. The algorithm has been implemented as a prototype extension of the real-time model-checker UPPAAL. We report from experiments with the tool that indicate that the technique can achieve substantial reduction in the time and memory needed to analyze a real-time system described in the studied component model. } } @TechReport{ it:2007-023, author = {Lars Ferm and Per L{\"o}tstedt}, title = {Adaptive Solution of the Master Equation in Low Dimensions}, institution = it, department = tdb, year = 2007, number = {2007-023}, month = sep, abstract = {The master equation satisfied by a probability density function is solved on a grid with a cell size $h>1.$ A modified master equation is derived for the time development of the average of the density in the larger cells. The accuracy of the approximation is studied and the total probability is conserved. Based on an estimate of the discretization error, the cell size is dynamically adapted to the solution. The method is suitable for a few space dimensions and is tested on a model for the migration of people. Substantial savings in memory requirements and CPU times are reported in numerical experiments.} } @TechReport{ it:2007-022, author = {Mei Hong and Torsten S{\"o}derstr{\"o}m and Johan Schoukens and Rik Pintelon}, title = {Accuracy Analysis of Time Domain Maximum Likelihood Method and Sample Maximum Likelihood Method for Errors-in-Variables Identification}, institution = it, department = syscon, year = 2007, number = {2007-022}, month = sep, abstract = {The time domain maximum likelihood (TML) method and the sample maximum Likelihood (SML) method are two approaches for identifying errors-in-variables models. Both methods may give the optimal estimation accuracy (achieve Cram\'er-Rao lower bound) but in different senses. In the TML method, an important assumption is that the noise-free input signal is modeled as a stationary process with rational spectrum. For SML, the noise-free input needs to be periodic. It is interesting to know which of these assumptions contain more information to boost the estimation performance. In this paper, the estimation accuracy of the two methods is analyzed statistically. Numerical comparisons between the two estimates are also done under different signal-to-noise ratios (SNRs). The results suggest that TML and SML have similar estimation accuracy at moderate or high SNR.} } @TechReport{ it:2007-021, author = {Mei Hong and Torsten S{\"o}derstr{\"o}m and Umberto Soverini and Roberto Diversi}, title = {Comparison of Three {F}risch Methods for Errors-in-Variables Identification}, institution = it, department = syscon, year = 2007, number = {2007-021}, month = aug, abstract = {The errors--in--variables framework concerns static or dynamic systems whose input and output variables are affected by additive noise. Several estimation methods have been proposed for identifying dynamic errors--in--variables models. One of the more promising approaches is the so--called Frisch scheme. This report decribes three different estimation criteria within the Frisch context and compares their estimation accuracy on the basis of the asymptotic covariance matrices of the estimates. Some final numerical examples support the theoretical results and analyze the behaviour of the methods in case of finite number of data.} } @TechReport{ it:2007-020, author = {J. Nordstr{\"o}m and F. Ham and M. Shoeybi and E. van der Weide and M. Sv{\"a}rd and K. Mattsson and G. Iaccarino and J. Gong}, title = {A Hybrid Method for Unsteady Fluid Flow}, institution = it, department = tdb, year = 2007, number = {2007-020}, month = aug, abstract = {We show how a stable and accurate hybrid procedure for fluid flow can be constructed. Two separate solvers, one using high order finite difference methods and another using the node-centered unstructured finite volume method are coupled in a truly stable way. The two flow solvers run independently and receive and send information from each other by using a third coupling code. Exact solutions to be Euler equations are used to verify the accuracy and stability of the new computational procedure. We also demonstrate the capability of the new procedure in a calculation of the flow in and around a model of a coral. } } @TechReport{ it:2007-019, author = {Sofia Eriksson and Magnus Sv{\"a}rd and Jan Nordstr{\"o}m}, title = {Simulations of Ground Effects on Wake Vortices at Runways}, institution = it, department = tdb, year = 2007, number = {2007-019}, month = jun, abstract = {In this paper the interaction between two counter-rotating vortices is examined, and the performance of a newly developed finite difference code is discussed. The Reynolds numbers considered are low to medium, and the flow is compressible. Most of the computations are performed in a two dimensional domain, with different grid sizes, Reynolds number and order of accuracy of the scheme. Finally, a three dimensional computation is made in order to examine the relevance of the two dimensional model.} } @TechReport{ it:2007-018, author = {Robin Adams and Sally Fincher and Arnold Pears and Jonas Boustedt and J{\"u}rgen B{\"o}rstler and Peter Dalenius and Gunilla Eken and Tim Heyer and Andreas Jakobsson and Vanja Lindberg and Bengt Molin and Jan Erik Mostr{\"o}m and Mattias Wiggberg}, title = {What is the Word for ``Engineering'' in {S}wedish: Swedish Students~ Conceptions of their Discipline}, institution = it, year = 2007, number = {2007-018}, month = jun, abstract = {Engineering education in Sweden ~ as in the rest of the world ~ is experiencing a decline in student interest. There are concerns about the ways in which students think about engineering education, why they join an academic programme in engineering, and why they persist in their studies. In this context the aims of the Nationellt {\"a}mnesdidaktiskt Centrum f{\"o}r Teknikutbildning i Studenternas Sammanhang project (CeTUSS) is to investigate the student experience and to identify and support a continuing network of interested researchers, as well as in building capacity for disciplinary pedagogic investigation. The Stepping Stones project brings together these interests in a multi-researcher, multi-institutional study that investigates how students and academic staff perceive engineering in Sweden and in Swedish education. The first results of that project are reported here. As this study is situated uniquely in Swedish education, it allows for exploration of ~a Swedish perspective~ on conceptions of engineering. The Stepping Stones project was based on a model of research capacity-building previously instantiated in the USA and Australia (Fincher & Tenenberg, 2006). } } @TechReport{ it:2007-017, author = {Olof Rensfelt and Lars-{\AA}ke Larzon}, title = {A Bandwidth Study of a {DHT} in a Heterogeneous Environment}, institution = it, department = docs, year = 2007, number = {2007-017}, month = may, abstract = {We present a NS-2 implementation of a distributed hash table (DHT) modeled after Bamboo. NS-2 is used to evaluate the bandwidth costs involved in using a DHT in heterogeneous environments. Networks are modeled as mixed networks of desktop machines and 3G cellphones. We also document the modifications of NS-2 that were needed to simulate churn in large networks.} } @TechReport{ it:2007-016, author = {Erik Nordstr{\"o}m and Per Gunningberg and Christian Rohner and Oskar Wibling}, title = {A Cross-Environment Study of Routing Protocols for Wireless Multi-hop Networks}, institution = it, department = docs, year = 2007, number = {2007-016}, month = apr, abstract = {We study ad hoc routing protocol mechanisms that impact the performance during and after periods of connectivity change. Our evaluation procedure is facilitated by using a structured and tool-supported approach, combining real world experiments with simulation and emulation. This method enables us to find performance-critical time regions in our traces. Our analysis shows that performance is largely determined by how accurately a protocol senses connectivity in these regions. Inaccurate sensing can seriously affect the performance of the protocol, even after the critical regions. We identify three significant problems with sensing that we call \emph{Self-interference}, \emph{TCP backlashing} and \emph{Link cache poisoning}. We discuss their effect on the design of sensing mechanisms in routing protocols and suggest how the protocols can be made more robust.} } @TechReport{ it:2007-015, author = {Torbj{\"o}rn Wigren}, title = {{MATLAB} Software for Recursive Identification of Systems With Output Quantization ~ Revision 1}, institution = it, department = syscon, year = 2007, number = {2007-015}, month = apr, note = {The software package can be downloaded from \url{http://www.it.uu.se/research/publications/reports/2007-015/QRISRev1.zip}} , abstract = {This reports is intended as a users manual for a package of MATLAB scripts and functions, developed for recursive identification of discrete time nonlinear Wiener systems, where the static output nonlinearity is a known arbitrary quantization function, not necessarily monotone. Wiener systems consist of linear dynamics in cascade with a static nonlinearity. Hence the systems treated by the software package can also be described as discrete time linear systems, where the output is measured after a known quantization function. The identification algorithms thus identify the linear dynamics of the Wiener system. The core of the package is an implementation of 5 recursive SISO output error identification algorithms. The measurement noise is assumed to affect the system after quantization. The identified linear dynamic part of the system is allowed to be of FIR or IIR type. A key feature of the identification algorithms is the use of a smooth approximation of the quantizer, for derivation of an approximation of the gradient of the algorithm. This is necessary since the derivative of the quantizer consists of a set of pulses, in the quantization steps. Using such an approximation 2 recursive stochastic gradient algorithms and 3 recursive Gauss-Newton algorithms are obtained. The algorithms differ by the choice of gradient approximation. It should be noted that the stochastic gradient algorithms are primarily suited for (high order) FIR systems ~ they converge very slowly for IIR systems due to the large eigenvalue spread of the Hessian that typically results for IIR systems. Arbitrarily colored additive measurement noise is handled by all algorithms. The software can only be run off-line, i.e. no true real time operation is possible. The algorithms are however implemented so that true on-line operation can be obtained by extraction of the main algorithmic loops. The user must then provide the real time environment. The software package contains scripts and functions that allow the user to either input live measurements or to generate test data by simulation. The functionality for display of results include scripts for plotting of data, parameters and prediction errors. Model validation is supported by several methods apart from the display functionality. First, calculation of the RPEM loss function can be performed, using parameters obtained at the end of an identification run. Pole-zero plots can be used to investigate possible overparameterization in the linear dynamic part of the Wiener model. Finally, the static accuracy as a function of the output signal amplitude can be assessed with mean residual analysis.} } @TechReport{ it:2007-014, author = {Parosh Aziz Abdulla and Giorgio Delzanno and Ahmed Rezine}, title = {Parameterized Verification of Infinite-state Processes with Global Conditions}, institution = it, department = docs, year = 2007, number = {2007-014}, month = apr, note = {A short version of this paper will appear in the proceedings of \emph{Computer Aided Verification} (CAV) 2007.}, abstract = {We present a simple and effective approximated backward reachability algorithm for parameterized systems with existentially and universally quantified global conditions. The individual processes operate on unbounded local variables ranging over the natural numbers. In addition, processes may communicate via broadcast, rendez-vous and shared variables. We apply the algorithm to verify mutual exclusion for complex protocols such as Lamport's bakery algorithm both with and without atomicity conditions, a distributed version of the bakery algorithm, and Ricart-Agrawala's distributed mutual exclusion algorithm.} } @TechReport{ it:2007-013, author = {Torbj{\"o}rn Wigren and Linda Brus}, title = {{MATLAB} Software for Recursive Identification and Scaling Using a Structured Nonlinear Black-box Model - Revision 3}, institution = it, department = syscon, year = 2007, number = {2007-013}, month = apr, note = {Revised version of nr 2005-022. The software package can be downloaded from \url{http://www.it.uu.se/research/publications/reports/2007-013/NRISoftwareRev3.zip}} , abstract = {This reports is intended as a users manual for a package of MATLAB scripts and functions, developed for recursive prediction error identification of nonlinear state space systems and nonlinear static systems. The core of the package is an implementation of an output error identification and scaling algorithm. The algorithm is based on a continuous time, structured black box state space model of a nonlinear system. An RPEM algorithm for recursive identification of nonlinear static systems, that re-uses the parameterization of the nonlinear ODE model, is also included in the software package. In this version of the software an initialization algorithm based on Kalman filter theory has been added to the package. The purpose of the initialization algorithm is to find initial parameters for the prediction error algorithm, and thus reduce the risk of convergence to local minima for the nonlinear identification problem. The software can only be run off-line, i.e. no true real time operation is possible. The algorithms are however implemented so that true on-line operation can be obtained by extraction of the main algorithmic loop. The user must then provide the real time environment. The software package contains scripts and functions that allow the user to either input live measurements or to generate test data by simulation. The scripts and functions for the setup and execution of the identification algorithms are somewhat more general than what is described in the references. There is e.g. support for automatic re-initiation of the algorithms using the parameters obtained at the end of a previous identification run. This allows for multiple runs through a set of data, something that is useful for data sets that are too short to allow convergence in a single run. The re-initiation step also allows the user to modify the degrees of the polynomial model structure and to specify terms that are to be excluded from the model. This makes it possible to iteratively re-fine the estimated model using multiple runs. The functionality for display of results include scripts for plotting of data, parameters, prediction errors, eigenvalues and the condition number of the Hessian. The estimated model obtained at the end of a run can be simulated and the model output plotted, alone or together with the data used for identification. Model validation is supported by two methods apart from the display functionality. First, a calculation of the RPEM loss function can be performed, using parameters obtained at the end of an identification run. Secondly, the accuracy as a function of the output signal amplitude can be assessed.} } @TechReport{ it:2007-012, author = {Alexander Churilov and Alexander Medvedev and Alexander Shepeljavyi}, title = {Mathematical Model of Non-Basal Testosterone Regulation in the Male by Pulse Modulated Feedback}, institution = it, department = syscon, year = 2007, number = {2007-012}, month = apr, abstract = {A parsimonious mathematical model of pulse modulated regulation of non-basal testosterone secretion in the male is developed. The model is of third differential order, reflecting the three most significant hormones in the regulation loop, but is yet shown to be capable of sustaining periodic solutions with one or two pulses of gonadotropin-releasing hormone (GnRH) on each period. Lack of stable periodic solutions is otherwise a main shortcoming of existing low-order hormone regulation models. Existence and stability of periodic solutions are studied. The periodic mode with two GnRH pulses on the least period has not been described in medical literature but is found to explain experimental data well. } } @TechReport{ it:2007-011, author = {Lars Ferm and Per L{\"o}tstedt and Andreas Hellander}, title = {A Hierarchy of Approximations of the Master Equation Scaled by a Size Parameter}, institution = it, department = tdb, year = 2007, number = {2007-011}, month = apr, abstract = {Solutions of the master equation are approximated using a hierarchy of models based on the solution of ordinary differential equations: the macroscopic equations, the linear noise approximation and the moment equations. The advantage with the approximations is that the computational work with deterministic algorithms grows as a polynomial in the number of species instead of an exponential growth with conventional methods for the master equation. The relation between the approximations is investigated theoretically and in numerical examples. The solutions converge to the macroscopic equations when a parameter measuring the size of the system grows. A computational criterion is suggested for estimating the accuracy of the approximations. The numerical examples are models for the migration of people, in population dynamics and in molecular biology.} } @TechReport{ it:2007-010, author = {Torbj{\"o}rn Wigren}, title = {{MATLAB} software for Recursive Identification of Wiener Systems - Revision 2}, institution = it, department = syscon, year = 2007, number = {2007-010}, month = mar, note = {The software package was updated to handle FIR systems, revision 3, April 2007. The updated software package and manual can be downloaded from \url{http://www.it.uu.se/research/publications/reports/2007-010/WRIS.zip}} , abstract = {This reports is intended as a users manual for a package of MATLAB scripts and functions, developed for recursive prediction error identification of discrete time nonlinear Wiener systems and nonlinear static systems. Wiener systems consist of linear dynamics in cascade with a static nonlinearity. The core of the package is an implementation of 9 recursive SISO output error identification algorithms. Three main cases are treated. The first set of 5 algorithms identify the IIR linear dynamics in cases where the static nonlinearity is known. It is stressed that the nonlinearity is allowed to be non-invertible. The second set of 2 algorithms simultaneously identifies the linear dynamics and the static non-linearity. The nonlinearity is parameterized as a piecewise linear or a piecewise quadratic nonlinear function. The last set of two algorithms exploits the above parameterization of the static nonlinearity for estimation of static nonlinear systems. Arbitrarily colored additive measurement noise is handled by all algorithms. The software can only be run off-line, i.e. no true real time operation is possible. The algorithms are however implemented so that true on-line operation can be obtained by extraction of the main algorithmic loops. The user must then provide the real time environment. The software package contains scripts and functions that allow the user to either input live measurements or to generate test data by simulation. The functionality for display of results include scripts for plotting of data, parameters and prediction errors. Model validation is supported by several methods apart from the display functionality. First, calculation of the RPEM loss function can be performed, using parameters obtained at the end of an identification run. Pole-zero plots can be used to investigate possible overparameterization in the linear dynamic part of the Wiener model. Finally, the static accuracy as a function of the output signal amplitude can be assessed.} } @TechReport{ it:2007-009, author = {Magnus {\AA}gren and Pierre Flener and Justin Pearson}, title = {On Constraint-Oriented Neighbours for Local Search}, institution = it, department = csd, year = 2007, number = {2007-009}, month = mar, abstract = {In the context of local search, we investigate the exploration of constraint-oriented neighbourhoods, where a set of constraints is picked before considering the neighbouring configurations where those constraints may have a different penalty. Given the semantics of a constraint, neighbourhoods consisting only of configurations with decreased (or preserved, or increased) penalty can be represented intensionally as a new attribute for constraint objects. We present a framework for combining neighbourhoods that allows different local search heuristics to be expressed, including multi-phase heuristics where an automatically identifiable suitable subset of the constraints is satisfied upon a first phase and then preserved in a second phase. This simplifies the design of local search algorithms compared to using just a variable-oriented neighbourhood, while not incurring any runtime overhead.} } @TechReport{ it:2007-008, author = {Erik B{\"a}ngtsson and Maya Neytcheva}, title = {Finite Element Block-Factorized Preconditioners}, institution = it, department = tdb, year = 2007, number = {2007-008}, month = mar, abstract = {In this work we consider block-factorized preconditioners for the iterative solution of systems of linear algebraic equations arising from finite element discretizations of scalar and vector partial differential equations of elliptic type. For the construction of the preconditioners we utilize a general two-level standard finite element framework and the corresponding block two-by-two form of the system matrix, induced by a splitting of the finite element spaces, referred to as {\em fine} and {\em coarse}, namely, $$ A = \begin{bmatrix} A_{11}&A_{12}\\ A_{21}&A_{22}\end{bmatrix} \begin{matrix}fine,\\ coarse.\end{matrix}. $$ The matrix $A$ admits the exact factorization $$ A = \begin{bmatrix}A_{11}&0\\ {A}_{21}&{S_A}\end{bmatrix} \begin{bmatrix}I_{1}&A_{11}^{-1}A_{12}\\ 0& I_2\end{bmatrix}, $$ where $S_A=A_{22}-A_{21}A_{11}^{-1}A_{12}$ and $I_1$, $I_2$ are identity matrices of corresponding size. The particular form of preconditioners we analyze here is $$ M_{B} = \begin{bmatrix}B_{11}&0\\ {A}_{21}&{S}\end{bmatrix} \begin{bmatrix}I_{1}&Z_{12}\\ 0& I_2\end{bmatrix}, $$ where $S$ is assumed to be some available good quality approximation of the Schur complement matrix $S_A$. We propose two methods to construct an efficient, sparse and computationally cheap approximation $B_{11}^{-1}$ of the inverse of the pivot block $A_{11}^{-1}$, required when solving systems with the block factorized preconditioner $M_B$. Furthermore, we propose an approximation $Z_{12}$ of the off-diagonal matrix block product $A_{11}^{-1}A_{12}$, which further reduces the computational complexity of the preconditioning step. All three approximations are based on element-by-element manipulations of local finite element matrices. The approach is applicable for both selfadjoint and non-selfadjoint problems, in two as well as in three dimensions. We analyze in detail the 2D case and provide extensive numerical evidence for the efficiency of the proposed matrix approximations.} } @TechReport{ it:2007-007, author = {Malin Ljungberg}, title = {Composable Difference Operators for Coordinate Invariant Partial Differential Equations}, institution = it, department = tdb, year = 2007, number = {2007-007}, month = feb, abstract = {Computer simulations are a cost efficient complement to laboratory experiments. Software for the solution of partial differential equations is part of the basic infrastructure for the evolving field of Computational Science and Engineering. Numerical analysts are involved in the development of suitable methods and algorithms for the solution of a wide range of partial differential equations. Based on the needs of a numerical analyst, we here identify requirements on software for the solution of partial differential equations. In particular, we look at support for flexible formulations of finite difference methods on curvilinear structured grids. We present FlexOp, a software solution that meets the requirements that have been identified. The semantics of the FlexOp is highly mathematical, and includes coordinate invariant operators. The discretization strategy is specified using parameterized classes, and can be chosen independently for different instances of the same operator. The use of static polymorphism allows for a flexible and efficient implementation of the application of the discretized operator. In order to assess the FlexOp, we use them to implement a compact fourth-order Numerov method, using the \textsc{TENGOME} infrastructure as a basis and C++ as the implementation language. We find that the FlexOp are easy to use, because of the high agreement between mathematical derivation and implementation of the method. Compared with a special purpose implementation for a particular discretization scheme, we find that FlexOp offer a significant reduction in the number of lines of code, together with an associated enhanced maintainability. The increased flexibility comes without a cost in the form of an increase of execution time, when compared with the special purpose application. } } @TechReport{ it:2007-006, author = {Anders Berglund and Mattias Wiggberg (eds.)}, title = {Proceedings from the 6th Baltic Sea Conference in Computing Education Research, Koli Calling}, institution = it, department = docs, year = 2007, number = {2007-006}, month = feb, abstract = {The 6th Baltic Sea Conference on Computing Education Research, Koli Calling, aims to promote the exchange of relevant research contributions and practical information between colleagues in the international community of Computing Education Research. Furthermore, Koli Calling aims to combine teaching and learning experiences that have a solid, theoretically anchored research orientation. The conference took place in the inspiring surroundings of the Koli National Park in Eastern Finland, November 9th - 12th 2006, and was organized by Uppsala University, Sweden with local arrangements by University of Joensuu. The submissions were double blind reviewed by the international program committee. Accepted papers are published in these final proceedings and are also available in the ACM Digital Library.} } @TechReport{ it:2007-005, author = {Parosh Aziz Abdulla and Ben Henda, Noomene and Richard Mayr and Sven Sandberg and de Alfaro, Luca}, title = {Stochastic Games with Lossy Channels}, institution = it, department = docs, year = 2007, number = {2007-005}, month = feb, note = {Updated 14 December 2007. To appear in the proceedings of FoSSaCS 2008.}, abstract = {We consider turn-based stochastic games on infinite graphs induced by game probabilistic lossy channel systems (GPLCS), the game version of probabilistic lossy channel systems (PLCS). We study games with Buchi (repeated reachability) objectives and almost-sure winning condition. Under the assumption that the target set is regular, a symbolic representation of the set of winning states for each player can be effectively constructed. Thus, turn-based stochastic games on GPLCS are decidable. This generalizes earlier decidability result for PLCS-induced Markov decision processes. Our scheme can be adapted to GPLCS with simple reachability objectives.}, oldabstract = {We consider turn-based stochastic games on infinite graphs induced by game probabilistic lossy channel systems (GPLCS), the game version of probabilistic lossy channel systems (PLCS). We study games with B{\"u}chi (repeated reachability) objectives and almost-sure winning condition. Under the condition that the players are limited to finite-memory strategies, a symbolic representation of the set of winning states for each player can be effectively constructed. Thus, finite-memory turn-based stochastic games on GPLCS are decidable. This generalizes a decidability result on finite-memory schedulers for PLCS-induced Markov decision processes in [C. Baier, N. Bertrand, P. Schnoebelen. Verifying nondeterministic probabilistic channel systems against $\omega$-regular linear-time properties. {\it ACM Transactions on Comp. Logic}, 2006. To appear.]. Our scheme can be adapted to GPLCS with simple reachability objectives.} } @TechReport{ it:2007-004, author = {Jonas Persson}, title = {Pricing American Options Using a Space-time Adaptive Finite Difference Method}, institution = it, department = tdb, year = 2007, number = {2007-004}, month = jan, abstract = {American options are priced numerically using a space- and time-adaptive finite difference method. The generalized Black-Scholes operator is discretized on a Cartesian structured but non-equidistant grid in space. The space- and time-discretizations are adjusted such that a predefined tolerance level on the local discretization error is met. An operator splitting technique is used to separately handle the early exercise constraint and the solution of linear systems of equations from the finite difference discretization of the linear complementarity problem. In numerical experiments three variants of the adaptive time-stepping algorithm with and without local time-stepping are compared.} } @TechReport{ it:2007-003, author = {Pierre Flener and Justin Pearson and Magnus {\AA}gren and Carlos Garcia Avello and Mete \c{C}eliktin}, title = {Air-Traffic Complexity Resolution in Multi-Sector Planning}, institution = it, department = csd, year = 2007, number = {2007-003}, month = jan, abstract = {Using constraint programming, we effectively model and efficiently solve the problem of balancing and minimising the traffic complexities of an airspace of adjacent sectors. The traffic complexity of a sector is here defined in terms of the numbers of flights within it, near its border, and on non-level segments within it. The allowed forms of complexity resolution are the changing of the take-off times of not yet airborne flights, the changing of the remaining approach times into the chosen airspace of already airborne flights by slowing down and speeding up within the two layers of feeder sectors around that airspace, as well as the changing of the levels of passage over way-points in that airspace. Experiments with actual European flight profiles obtained from the Central Flow Management Unit (CFMU) show that these forms of complexity resolution can lead to significant complexity reductions and rebalancing. } } @TechReport{ it:2007-002, author = {Jing Gong and Jan Nordstr{\"o}m}, title = {A Stable and Efficient Hybrid Scheme for Viscous Problems in Complex Geometries}, institution = it, department = tdb, year = 2007, number = {2007-002}, month = jan, abstract = {In this paper we present a stable hybrid scheme for viscous problems. The hybrid method combines the unstructured finite volume method with high-order finite difference methods on complex geometries. The coupling procedure between the two numerical methods is based on energy estimates and stable interface conditions are constructed. Numerical calculations show that the hybrid method is efficient and accurate. } } @TechReport{ it:2006-052, author = {Parosh Aziz Abdulla and Noomene Ben Henda and Giorgio Delzanno and Ahmed Rezine}, title = {Regular Model Checking without Transducers (On Efficient Verification of Parameterized Systems)}, institution = it, department = docs, year = 2006, number = {2006-052}, month = dec, note = {To appear in the proceedings of TACAS 2007}, abstract = {We give a simple and efficient method to prove safety properties for parameterized systems with linear topologies. A process in the system is a finite-state automaton, where the transitions are guarded by both local and global conditions. Processes may communicate via broadcast, rendez-vous and shared variables. The method derives an over-approximation of the induced transition system, which allows the use of a simple class of regular expressions as a symbolic representation. Compared to traditional regular model checking methods, the analysis does not require the manipulation of transducers, and hence its simplicity and efficiency. We have implemented a prototype which works well on several mutual exclusion algorithms and cache coherence protocols.} } @TechReport{ it:2006-051, author = {Erik B{\"a}ngtsson and Bj{\"o}rn Lund}, title = {A Comparison Between Two Solution Techniques to Solve the Equations of Linear Isostasy}, institution = it, department = tdb, year = 2006, number = {2006-051}, month = dec, abstract = {In this paper we compare two models to compute the isostatic response of the Earths lithosphere to an external load. The lithosphere is modeled as a linear elastic solid. The two models differ in the mathematical formulation of the problem, their applicability in the incompressible limit, the choice of the finite elements used for discretization, and the solution strategy for the arising algebraic problem. The efficiency and accuracy of both models are compared via extensive numerical experiments in 2D and 3D.} } @TechReport{ it:2006-050, author = {Iordanis Kavathatzopoulos}, title = {AvI-enk{\"a}ten: Ett verktyg f{\"o}r att m{\"a}ta anv{\"a}ndbarhet, stress och nytta av IT-st{\"o}d}, institution = it, department = hci, year = 2006, number = {2006-050}, month = dec, note = {In Swedish}, abstract = {Syftet med detta arbete {\"a}r att utveckla ett index f{\"o}r att beskriva anv{\"a}ndbarheten hos IT-verktyg och hur anv{\"a}ndbarhet p{\aa}verkar effektivitet. Enk{\"a}ten pr{\"o}vades inom CSN och g{\"a}llde IT-verktyget STIS2000. 498 personer svarade p{\aa} enk{\"a}ten, 68\% av alla anv{\"a}ndare av verktyget. Sammanh{\aa}llningen av enk{\"a}tens delar och fr{\aa}gor {\"a}r tillfredsst{\"a}llande. Tillf{\"o}rlitligheten {\"a}r ocks{\aa} tillfredsst{\"a}llande. Enk{\"a}tens struktur och inneh{\aa}ll utg{\"o}r en bra grund f{\"o}r att konstruera ett anv{\"a}ndbarhetsindex. Svaren visar ocks{\aa} att deltagarna {\"a}r ganska positiva n{\"a}r det g{\"a}ller anv{\"a}ndningen av STIS2000 inom CSN.} } @TechReport{ it:2006-049, author = {Stefan Blomkvist}, title = {The User as a Personality: A Reflection on the Theoretical and Practical Use of Personas in {HCI} Design}, institution = it, department = hci, year = 2006, number = {2006-049}, month = nov, abstract = {A persona is a user model that focuses on the individual's goals in interaction. The model resembles classical user profiles, but with some important distinctions. The persona represents patterns of users' behaviour and goals. The concept has a focus on practical interaction design and is not based on a theoretical HCI perspective, although it implies a distinctive perspective on the role of users and interaction. The purpose of this paper is to examine how this concept relates to some theoretical perspectives on users and interaction. One such theory that I discuss is activity theory, which share the emphasis on that interaction is driven by users' motives and goals. But activity theory is a more elaborated framework for studying activities. The conclusion is that personas have a narrower perspective on users and activities compared to activity theory, but this also makes it more easy to use as a tool to direct design. Also, the role of the user in the persona approach is blurred with the role as a consumer. The issue of interaction design becomes a matter of satisfying the needs of the consumers, not to improve human work and life. } } @TechReport{ it:2006-048, author = {Owe Axelsson and Radim Blaheta and Maya Neytcheva}, title = {Preconditioning of Boundary Value Problems using Elementwise {S}chur Complements}, institution = it, department = tdb, year = 2006, number = {2006-048}, month = nov, abstract = {Based on a particular node ordering and corresponding block decomposition of the matrix we analyse an efficient, algebraic multilevel preconditioner for the iterative solution of finite element discretizations of elliptic boundary value problems. Thereby an analysis of a new version of block-factorization preconditioning methods is presented. The approximate factorization requires an approximation of the arising Schur complement matrix. In this paper we consider such approximations derived by the assembly of the local macro-element Schur complements. The method can be applied also for non-selfadjoint problems but for the derivation of condition number bounds we assume that the corresponding differential operator is selfadjoint and positive definite.} } @TechReport{ it:2006-047, author = {Henrik Johansson and Johan Steensland}, title = {A Performance Characterization of Load Balancing Algorithms for Parallel {SAMR} Applications}, institution = it, department = tdb, year = 2006, number = {2006-047}, month = oct, abstract = {We perform a comprehensive performance characterization of load balancing algorithms for parallel structured adaptive mesh refinement (SAMR) applications. Using SAMR, computational resources are dynamically concentrated to areas in need of a high accuracy. Because of the dynamic resource allocation, the workload must repeatedly be partitioned and distributed over the processors. For an efficient parallel SAMR implementation, the partitioning algorithm must be dynamically selected at run-time with regard to both the application and computer state. We characterize and compare a common partitioning algorithm and a large number of alternative partitioning algorithms. The results prove the viability of dynamic algorithm selection and show the benefits of using a large number of complementing partitioning algorithms. } } @TechReport{ it:2006-046, author = {Mei Hong and Torsten S{\"o}derstr{\"o}m and Wei Xing Zheng}, title = {Asymptotic Accuracy Analysis of Bias-Eliminating Least Squares Estimates for Identification of Errors in Variables Systems}, institution = it, department = syscon, year = 2006, number = {2006-046}, month = oct, abstract = {The bias-eliminating least squares (BELS) method is one of the consistent estimators for identifying dynamic errors-in-variables systems. The attraction of the BELS method lies in its good accuracy and its modest computational cost. In this report, we investigate the asymptotic accuracy properties of the BELS estimates. It is shown that the estimated system parameters and the estimated noise variances are asymptotically Gaussian distributed. An explicit expression for the normalized covariance matrix of the estimated parameters is derived and supported by some numerical examples.} } @TechReport{ it:2006-045, author = {Anders Hessel and Paul Pettersson}, title = {Model-Based Testing of a {WAP} Gateway: an Industrial Case-Study}, institution = it, department = docs, year = 2006, number = {2006-045}, month = sep, abstract = {We present experiences from a case study where a model-based approach to black-box testing is applied to verify that a Wireless Application Protocol (WAP) gateway conforms to its specification. The WAP gateway is developed by Ericsson and used in mobile telephone networks to connect mobile phones with the Internet. We focus on testing the software implementing the session (WSP) and transaction (WTP) layers of the WAP protocol. These layers, and their surrounding environment, are described as a network of timed automata. To model the many sequence numbers (from a large domain) used in the protocol, we introduce an abstraction technique. We believe the suggested abstraction technique will prove useful to model and analyse other similar protocols with sequence numbers, in particular in the context of model-based testing. A complete test bed is presented, which includes generation and execution of test cases. It takes as input a model and a coverage criterion expressed as an observer, and returns a verdict for each test case. The test bed includes existing tools from Ericsson for test-case execution. To generate test suites, we use our own tool \textsc{Cover} --- a new test-case generation tool based on the real-time model-checker \textsc{Uppaal}.} } @TechReport{ it:2006-044, author = {Mei Hong and Torsten S{\"o}derstr{\"o}m and Johan Schoukens and Rik Pintelon}, title = {Comparison of Time Domain Maximum Likelihood Method and Sample Maximum Likelihood Method in Errors-in-Variables Identification}, institution = it, department = syscon, year = 2006, number = {2006-044}, month = sep, abstract = {The time domain maximum likelihood (TML) method and the Sample Maximum Likelihood (SML) method are two general approaches for identifying errors-in-variables models. In the TML method, an important assumption is that the noise-free input signal must be a stationary process with rational spectrum. For SML, the noise-free input need to be periodic. In this report, numerical comparisons of these two methods are done under different situations. The results suggest that TML and SML have similar estimation accuracy at moderate or high signal-to-noise ratio (SNR).} } @TechReport{ it:2006-043, author = {Jarmo Rantakokko}, title = {Case-Centered Learning of Scientific Computing}, institution = it, department = tdb, year = 2006, number = {2006-043}, month = sep, abstract = {Traditionally courses in scientific computing teach a large number of methods and algorithms for different mathematical problems. The algorithms are applied on simplified problems and not on real applications. The result is that the students can't see the main thread, they focus only on the details of the methods and don't see the entirety. The students can not put what they learn into perspective and their motivation to study becomes diminished. In this paper we suggest a case-centered approach for learning scientific computing. We use a real-life case, weather prediction, as a starting point for learning. The case is analyzed and discussed in class. To follow up the discussions the students are assigned learning tasks in scientific computing defined from the case analysis. The real-life application connects the different topics in scientific computing together and motivates the students. The response from the students has been positive and the case has increased their understanding of what scientific computing is and what it can be used for.} } @TechReport{ it:2006-042, author = {Eddie Wadbro}, title = {On the Far-Field Properties of an Acoustic Horn}, institution = it, department = tdb, year = 2006, number = {2006-042}, month = sep, abstract = {This report presents a derivation of an expression for time harmonic acoustic wave propagation in the far field for two and three space dimensions, and includes detailed descriptions of the numerical evaluation of the far-field pattern in some typical situations. The presentation covers all parts required for computing the far-field properties of acoustical devices, and the report is designed to function as a single reference for these computations.} } @TechReport{ it:2006-041, author = {Markus Nord{\'e}n}, title = {Performance Modelling for Parallel {PDE} Solvers on {NUMA}-Systems}, institution = it, department = tdb, year = 2006, number = {2006-041}, month = aug, abstract = {A detailed model of the memory performance of a PDE solver running on a NUMA-system is set up. Due to the complexity of modern computers, such a detailed model inevitably is very complicated. Therefore, approximations are introduced that simplify the model and allows NUMA-systems and PDE solvers to be described conveniently. Using the simplified model, it is shown that PDE solvers using ordered local methods can be made very unsensitive to high NUMA-ratios, allowing them to scale well on virtually any NUMA-system. PDE solvers using unordered local methods, semiglobal methods or global methods are more sensitive to high NUMA-ratios and require special techniques in order to scale well beyond a single locality group. Nevertheless, the potential performance gain of improving the data distribution on a NUMA-system can be considerable for all kinds of PDE solvers studied. } } @TechReport{ it:2006-040, author = {Mei Hong and Torsten S{\"o}derstr{\"o}m and Wei Xing Zheng}, title = {A Simplified Form of the Bias-Eliminating Least Squares Method for Errors-In-Variables Identification}, institution = it, department = syscon, year = 2006, number = {2006-040}, month = aug, abstract = {This report considers the bias-eliminating least squares (BELS) method for identifying the errors-in-variables systems with white input noise and colored output noise. A simplified form of the BELS algorithm is proposed which is proved to be equivalent to the existing one. The new relation is a form of linear IV equations which will not only reduce the computational load but also simplify the analysis of the properties of the BELS estimates.} } @TechReport{ it:2006-039, author = {Andreas Hellander and Per L{\"o}tstedt}, title = {Hybrid Method for the Chemical Master Equation}, institution = it, department = tdb, year = 2006, number = {2006-039}, month = aug, abstract = {The chemical master equation is solved by a hybrid method coupling a macroscopic, deterministic description with a mesoscopic, stochastic model. The molecular species are divided into one subset where the expected values of the number of molecules are computed and one subset with species with a stochastic variation in the number of molecules. The macroscopic equations resemble the reaction rate equations and the probability distribution for the stochastic variables satisfy a master equation. The probability distribution is obtained by the Stochastic Simulation Algorithm due to Gillespie. The equations are coupled via a summation over the mesoscale variables. This summation is approximated by Monte Carlo and Quasi Monte Carlo methods. The error in the approximations is analyzed. The hybrid method is applied to three chemical systems from molecular cell biology.} } @TechReport{ it:2006-038, author = {Markus Nord{\'e}n and Henrik L{\"o}f and Jarmo Rantakokko and Sverker Holmgren}, title = {Geographical Locality and Dynamic Data Migration for {OpenMP} Implementations of Adaptive {PDE} Solvers}, institution = it, department = tdb, year = 2006, number = {2006-038}, month = aug, note = {To appear in \emph{Proceedings of the 2:nd International Workshop on OpenMP (IWOMP)}}, abstract = {On cc-NUMA multi-processors, the non-uniformity of main memory latencies motivates the need for co-location of threads and data. We call this special form of data locality, \emph{geographical locality}. In this article, we study the performance of a parallel PDE solver with adaptive mesh refinement. The solver is parallelized using OpenMP and the adaptive mesh refinement makes dynamic load balancing necessary. Due to the dynamically changing memory access pattern caused by the runtime adaption, it is a challenging task to achieve a high degree of geographical locality. The main conclusions of the study are: (1) that geographical locality is very important for the performance of the solver, (2) that the performance can be improved significantly using dynamic page migration of misplaced data, (3) that a migrate-on-next-touch directive works well whereas the first-touch strategy is less advantageous for programs exhibiting a dynamically changing memory access patterns, and (4) that the overhead for such migration is low compared to the total execution time.} } @TechReport{ it:2006-037, author = {Elisabeth Larsson and Krister {\AA}hlander and Andreas Hall}, title = {Multi-Dimensional Option Pricing using Radial Basis Functions and the Generalized {F}ourier Transform}, institution = it, department = tdb, year = 2006, number = {2006-037}, month = aug, abstract = {We show that the generalized Fourier transform can be used for reducing the computational cost and memory requirements of radial basis function methods for multi-dimensional option pricing. We derive a general algorithm, including a transformation of the Black--Scholes equation into the heat equation, that can be used in any number of dimensions. Numerical experiments in two and three dimensions show that the gain is substantial even for small problem sizes. Furthermore, the gain increases with the number of dimensions.} } @TechReport{ it:2006-036, author = {Stefan Engblom}, title = {A Discrete Spectral Method for the Chemical Master Equation}, institution = it, department = tdb, year = 2006, number = {2006-036}, month = jun, note = {Superseded by report nr 2008-005}, abstract = {As an equivalent formulation of the Markov-assumption of stochastic processes, the master equation of chemical reactions is an accurate description of general systems in chemistry. For $D$ reacting species this is a differential-difference equation in $D$ dimensions, exactly soluble for very simple systems only. We present and analyze a novel solution strategy in the form of a Galerkin spectral method with an inherent natural adaptivity and a very favorable choice of basis functions. The method is exemplified by the numerical solution of two systems taken from molecular biology. It is shown that the method remains effective and accurate when other traditional solution methods produce less useful results. } } @TechReport{ it:2006-035, author = {Owe Axelsson and Janos Kar\'{a}tson}, title = {Mesh Independent Superlinear {PCG} Rates via Compact-Equivalent Operators}, institution = it, department = tdb, year = 2006, number = {2006-035}, month = jun, abstract = {The subject of the paper is the mesh independent convergence of the preconditioned conjugate gradient method for nonsymmetric elliptic problems. The approach of equivalent operators is involved, in which one uses the discretization of another suitable elliptic operator to construct a preconditioning matrix. By introducing the notion of compact-equivalent operators, it is proved that for a wide class of elliptic problems the superlinear convergence of the obtained PCGM is mesh independent under FEM discretizations, that is, the rate of superlinear convergence is given in the form of a sequence which is mesh independent and is determined only by the elliptic operators.} } @TechReport{ it:2006-034, author = {Agnes Rensfelt and Torsten S{\"o}derstr{\"o}m}, title = {Optimal Excitation for Nonparametric Identification of Viscoelastic Materials}, institution = it, department = syscon, year = 2006, number = {2006-034}, month = jun, abstract = {The problem of optimal excitation in nonparametric identification of viscoelastic materials is considered. The goal is to design the input spectrum in an optimal way, so that the average variance of the estimates is minimized. It is shown how the covariance matrix of the estimates can be expressed in terms of the input spectrum. This theory can also be used in order to identify the (unknown) excitation, used in a particular experiment, from measured strain data. Two scalar criteria connected to A- and D-optimal experiment design, are considered. The results indicate that the accuracy of the estimates can be greatly improved by applying an optimal input signal. Issues concerning the implementation of the achieved optimal input spectrum in live experiments are discussed briefly.} } @TechReport{ it:2006-033, author = {Parosh Aziz Abdulla and Noomene Ben Henda and Richard Mayr and Sven Sandberg}, title = {Limiting Behavior of {M}arkov Chains with Eager Attractors}, institution = it, department = docs, year = 2006, number = {2006-033}, month = jun, abstract = {We consider discrete infinite-state Markov chains which contain an eager finite attractor. A finite attractor is a finite subset of states that is eventually reached with probability 1 from every other state, and the eagerness condition requires that the probability of avoiding the attractor in $n$ or more steps after leaving it is exponentially bounded in $n$. Examples of such Markov chains are those induced by probabilistic lossy channel systems and similar systems. We show that the expected residence time (a generalization of the steady state distribution) exists for Markov chains with eager attractors and that it can be effectively approximated to arbitrary precision. Furthermore, arbitrarily close approximations of the limiting average expected reward, with respect to state-based bounded reward functions, are also computable. } } @TechReport{ it:2006-032, author = {Torsten S{\"o}derstr{\"o}m}, title = {Extending the {F}risch Scheme for Errors-in-Variables Identification to Correlated Output Noise}, institution = it, department = syscon, year = 2006, number = {2006-032}, month = jun, abstract = {Several estimation methods have been proposed for identifying errors-in-variables systems, where both input and output measurements are corrupted by noise. One of the promising approaches is the so called Frisch scheme. In its standard form it is designed to handle white measurement noise on the input and output sides. As the output noise comprises both effects of measurement errors and of process disturbances, it is much more realistic to allow correlated output noise. It is described in the paper how the Frisch scheme can be extended to such cases. } } @TechReport{ it:2006-031, author = {H{\aa}kan Zeffer and Erik Hagersten}, title = {A Case for Low-Complexity Multi-{CMP} Architectures}, institution = it, department = docs, year = 2006, number = {2006-031}, month = jun, abstract = {The advances in semiconductor technology have set the shared memory server trend towards processors with multiple cores per die and multiple threads per core. This paper presents simple hardware primitives enabling flexible and low complexity multi-chip designs supporting an efficient inter-node coherence protocol run in software. The design is based on two node permission bits per cache line and a new way to decouple the intra-chip coherence protocol from the inter-node coherence protocol. The protocol implementation enables the system to cache remote data in the local memory system with no additional hardware support. Our evaluation is based on detailed full system simulation of both commercial and HPC workloads. We compare a low-complexity system based on the proposed primitives with aggressive hardware multi-chip shared-memory systems and show that the performance is competitive, and often better, across a large design space.} } @TechReport{ it:2006-030, author = {Lars-Henrik Eriksson}, title = {The {GTO} Toolset and Method}, institution = it, department = csd, year = 2006, number = {2006-030}, month = jun, abstract = {A suitable method supported by a toolset with a high degree of automation is a necessity for the successful employment of formal methods in industrial projects. The GTO toolset and method have been developed, and successfully applied, to formal methods in safety- critical control applications related to railway signalling since the mid 1990s. The toolset and method support the entire formal methods process from writing and validating formal specifications, through modelling of the implementation to formal verification and analysis of verification results. One goal the toolset and method was to make formal methods more competitive by streamlining the process so that -- at least within an established application area -- individual verification tasks could be done in an ``assembly line''-like fashion with minimum overhead. In line with this goal, the toolset is intended for use with configurable systems, where a generic specification is applicable to a family of systems and adapted to a specific system using configuration data. The functions carried out by the toolset include static checking and simulation of specifications, checking of configuration data, generation of implementation models from PLC program code or relay schematics, simulation of the implementation model, formal verification by refinement proof, and analysis of failed refinement proofs. Refinement proofs are automatically carried out by a satisfiability (SAT) solver of the user's choice, which is interfaced to the main tool. We will outline the method and functions of the toolset as well as the formal notation -- a simple temporal predicate logic -- used by the toolset.} } @TechReport{ it:2006-029, author = {Lars-Henrik Eriksson}, title = {Use of Domain Theories in Applied Formal Methods}, institution = it, department = csd, year = 2006, number = {2006-029}, month = jun, abstract = {A formal theory of an application domain can serve a key role in formal specification and verification of systems operating in that domain. This is particularly relevant when working with families of similar systems intended to control some kind of industrial process or function where the control principles are general while the specifics of the system depend on the particular installation the system is intended to control. In such situations a domain theory can facilitate writing generic specifications or implementations for the entire family of systems which can then be configured for each particular installation. Use of concepts defined in the domain theory can also facilitate communication with domain experts without knowledge in formal methods. An example of such a family is railway signalling systems which all implement the same general signalling principles of ensuring safe train operation, while the exact function of a particular signalling system depends on the railway track layout under its control. We will give concrete examples from industrial practise in the railway domain, showing how domain theories can help in the formal verification process. The examples include writing and validating formal specifications, formally verifying implementations and analysing and communicating the results of failed verifications.} } @TechReport{ it:2006-028, author = {Ulrika Pettersson and Elisabeth Larsson and Gunnar Marcusson and Jonas Persson}, title = {Improved Radial Basis Function Methods for Multi-Dimensional Option Pricing}, institution = it, department = tdb, year = 2006, number = {2006-028}, month = may, abstract = {In this paper, we have derived a radial basis function (RBF) based method for the pricing of financial contracts by solving the Black-Scholes partial differential equation. As an example of a financial contract that can be priced with this method we have chosen the multi-dimensional European basket call option. We have shown numerically that our scheme is second order accurate in time and spectrally accurate in space for constant shape parameter. For other, non-optimal choices of shape parameter values, the resulting convergence rate is algebraic. We propose an adaptive node point placement that improves the accuracy compared with a uniform distribution. Compared with an adaptive finite difference method, the RBF method is 20-40 times faster in one and two space dimensions and has approximately the same memory requirements.} } @TechReport{ it:2006-027, author = {Jan Nordstr{\"o}m}, title = {Error Bounded Schemes for Time-Dependent Hyperbolic Problems}, institution = it, department = tdb, year = 2006, number = {2006-027}, month = may, abstract = {In this paper we address the error growth in time for hyperbolic problems on first order form. The energy method is used to study when an error growth or a fixed error bound is obtained. It is shown that the choice of boundary procedure is a crucial point. Numerical experiments corroborate the theoretical findings.} } @TechReport{ it:2006-026, author = {Jan Nordstr{\"o}m and Ken Mattsson and Charles Swanson}, title = {Boundary Conditions for a Divergence Free Velocity-Pressure Formulation of the Incompressible {N}avier-{S}tokes Equations}, institution = it, department = tdb, year = 2006, number = {2006-026}, month = may, abstract = {New sets of boundary conditions for the velocity-pressure formulation of the incompressible Navier-Stokes equations are derived. The boundary conditions have the same form on both inflow and outflow boundaries and lead to a divergence free solution. Moreover, the specific form of the boundary conditions makes it possible derive a symmetric positive definite equation system for the internal pressure. Numerical experiments support the theoretical conclusions. } } @TechReport{ it:2006-025, author = {Johan Wikstr{\"o}m and Arvid Kauppi and Arne W. Andersson and Bengt Sandblad}, title = {Designing a Graphical User Interface for Train Traffic Control}, institution = it, department = hci, year = 2006, number = {2006-025}, abstract = {In 1996, on initiative of the Swedish National Rail Administration, a research study was initiated by the department of Human Computer Interaction at Uppsala University with the aim to learn more about the problems and difficulties involved in train traffic control. As a result of this study, and the developing situation with higher speeds, more frequent traffic, and many competing train traffic operators, a research project was initiated. The purpose was to find new strategies and technical solutions for future train traffic control. Modern research on human-computer interaction in complex and dynamic systems provided a framework for how to design an interface meeting these demands. Important aspects concern e.g. workload, situation awareness and automated cognitive processes, limitations in human memory capacity, cognitive work environment problems, human error performance and dynamic decision processes. Throughout the research a user centered approach has been applied. The new proposed interface is designed to integrate all decision relevant information into one unified interface and to support a continuous awareness of the dynamic development of the traffic process. A prototype of new train traffic control interface has been implemented in close collaboration with active train dispatchers. Early and promising in-house tests have been made using the prototype described in this paper. More extensive case studies and experiments need to be conducted before a complete evaluation can be made.} } @TechReport{ it:2006-024, author = {Arvid Kauppi and Johan Wikstr{\"o}m and Bengt Sandblad and Arne W. Andersson}, title = {Control Strategies for Managing Train Traffic, Difficulties Today and Solutions for the Future}, institution = it, department = hci, year = 2006, number = {2006-024}, month = may, abstract = {In 1996, on initiative from the Swedish National Rail Administration, the department of Human-Computer Interaction at the institute for Information Technology, Uppsala University initiated a research project with the objective to identify the difficulties present in today's train traffic control today and to find solutions to those problems, if possible. This paper describes the strategy used to control train traffic in Sweden today. Problems and difficulties inherited from the use of the current control strategies and systems are presented. With the goal to solve these problems, and aid the human operator in their work, solutions for new principles for control and a new control strategy are proposed - control by re-planning. The proposed control strategy is designed to support the train dispatcher to work in a more preventive manner and thereby avoiding potential disturbances in traffic when possible. The focus of control tasks will be shifted from controlling infrastructure on a technical level to focus more on a higher level of controlling the traffic flow through re-planning tasks. The new control strategy in combination with a new approach to automation, higher availability of decision relevant information and new graphical user interfaces addresses many of the issues and problems found in the control environment today.} } @TechReport{ it:2006-023, author = {Gergana Bencheva and Svetozar Margenov and Ji\v{r}\'{\i} Star\'{y}}, title = {{MPI} Implementation of a {PCG} Solver for Nonconforming {FEM} Problems: Overlapping of Communications and Computations}, institution = it, year = 2006, number = {2006-023}, month = may, abstract = {New theoretical and experimental results concerning a recently introduced parallel preconditioner for the solution of large nonconforming Finite Element linear systems are presented. The studied algorithm is based on the modified incomplete Cholesky factorization MIC(0) applied to a locally constructed approximation of the original stiffness matrix. The overlapping of communications and computations is possible due to a suitable reordering of the computations applied in the MPI code. Theoretical estimates for the execution time of the modified algorithm are derived. The obtained improvement of the real performance is illustrated by numerical tests on a Beowulf-type Linux cluster, on a Sun symmetric multiprocessor and on an SGI Altix supercluster.} } @TechReport{ it:2006-022, author = {Niclas Sandgren and Petre Stoica}, title = {On Moving Average Parameter Estimation}, institution = it, department = syscon, year = 2006, number = {2006-022}, month = apr, abstract = {Estimation of the autoregressive moving average (ARMA) parameters of a stationary stochastic process is a problem often encountered in the signal processing literature. It is well known that estimating the moving average (MA) parameters is usually more difficult than estimating the autoregressive (AR) part, especially if the zeros are located close to the unit circle. In this paper we present four linear methods for MA parameter estimation (i.e., methods that involve only linear operations) and compare their performances first in a case when the zeros are located far away from the unit circle and secondly in a presumably harder case when the zeros are located very close to the unit circle.} } @TechReport{ it:2006-021, author = {Gunilla Linde and Jonas Persson and von Sydow, Lina}, title = {High-Order Adaptive Space-Discretizations for the {B}lack--{S}choles Equation}, institution = it, department = tdb, year = 2006, number = {2006-021}, month = apr, abstract = {In this paper we develop a high-order adaptive finite difference space-discretization for the Black--Scholes (B--S) equation. The final condition is discontinuous in the first derivative yielding that the effective rate of convergence is two, both for low-order and high-order standard finite difference (FD) schemes. To obtain a sixth-order scheme we use an extra grid in a limited space- and time-domain. The new sixth-order method is called FD6G2. The FD6G2-method is combined with space- and time-adaptivity to further enhance the method. To obtain solutions of high accuracy in several dimensions the adaptive FD6G2-method is superior to both standard and adaptive second-order FD-methods.} } @TechReport{ it:2006-020, author = {Nicolas Beldiceanu and Pierre Flener and Xavier Lorca}, title = {Combining Tree Partitioning, Precedence, Incomparability, and Degree Constraints, with an Application to Phylogenetic and Ordered-Path Problems}, institution = it, department = csd, year = 2006, number = {2006-020}, month = apr, abstract = {The \emph{tree} and \emph{path} constraints, for digraph partitioning by vertex disjoint trees and paths respectively, are unified within a single global constraint, including a uniform treatment of a variety of useful side constraints, such as precedence, incomparability, and degree constraints. The approach provides a sharp improvement over an existing \emph{path} constraint, but can also efficiently handle tree problems, such as the phylogenetic supertree construction problem. The key point of the filtering is to take partially into account the strong interactions between the tree partitioning problem and all the side constraints.} } @TechReport{ it:2006-019, author = {Jing Gong and Jan Nordstr{\"o}m}, title = {Stable, Accurate and Efficient Interface Procedures for Viscous Problems}, institution = it, department = tdb, year = 2006, number = {2006-019}, month = apr, abstract = {In this paper we investigate and study several different interface procedures for finite difference methods applied to viscous problems. The analysis show that stable, accurate and efficient interface procedures can be obtained. The analysis also show that only minor differences of the various methods exist.} } @TechReport{ it:2006-018, author = {Dan Wallin and Henrik L{\"o}f and Erik Hagersten and Sverker Holmgren}, title = {Multigrid and Gauss-Seidel Smoothers Revisited: Parallelization on Chip Multiprocessors}, institution = it, year = 2006, number = {2006-018}, month = apr, abstract = {Efficient solutions of partial differential equations require a match between the algorithm and the underlying architecture. The new chip-multiprocessors, CMPs (a.k.a. multicore), feature low intra-chip communication cost and smaller per-thread caches compared to previous systems. From an algorithmic point of view this means that data locality issues become more important than communication overheads. This may require re-evaluation of many existing algorithms. We have investigated parallel implementations of multigrid methods using a temporally blocked, naturally ordered, smoother implementation. Compared with the standard multigrid solution based on the two-color red-black algorithm, we improve the data locality often as much as ten times, while our use of a fine-grained locking scheme keeps the parallel efficiency high. While our algorithm initially was inspired by CMPs, it was surprising to see our OpenMP multigrid implementation run up to 40 percent faster than the standard red-black algorithm on an 8-way SMP system. Studying the smoother part of the algorithm in isolation often shows it performing two iterations at the same time as a single iteration with an ordinary red-black smoother. Running our smoother on a 32-thread UltraSPARC T1 (Niagara) SMT/CMP and a simulated 32-way CMP demonstrates the communication cost of our algorithm to be low on such architectures.} } @TechReport{ it:2006-017, author = {Andr{\'e} Yamba Yamba and Krister {\AA}hlander and Malin Ljungberg}, title = {Designing for Geometrical Symmetry Exploitation}, institution = it, department = tdb, year = 2006, number = {2006-017}, month = apr, abstract = {Symmetry exploiting software based on the generalized Fourier transform (GFT) is presented from a practical design point of view. The algorithms and data structures map closely to the relevant mathematical abstractions, which primarily are based upon representation theory for groups. Particular care has been taken in the design of the data layout of the performance sensitive numerical data structures. The use of a vanilla strategy is advocated for the design of flexible mathematical software libraries: An efficient general-purpose routine should be supplied, to obtain a practical and useful system, while the possibility to extend the library and replace the default routine with a special-purpose---even more optimized---routine should be supported. Compared with a direct approach, the performance results show the superiority of the GFT based approach for so-called dense equivariant systems. The application is found to be well suited for parallelism.} } @TechReport{ it:2006-016, author = {Johan Petrini and Tore Risch}, title = {Scalable {RDF} Views of Relational Databases through Partial Evaluation}, institution = it, department = csd, year = 2006, number = {2006-016}, month = mar, note = {Updated April 2006}, abstract = {The semantic web represents meta-data as a triple relation using the RDF data model. We have developed a system to process queries to RDF views of entire relational databases. Optimization of queries to such views is challenging because i) RDF views of entire relational databases become large unions, and ii) queries to the views are more general than relational database queries, making no clear distinction between data and schema. As queries need not be expressed in terms of a schema, it becomes critical to optimize not only data access time but also the time to perform the query optimization itself. We have developed novel query optimization techniques for scalable queries to RDF views of relational databases. Our optimization techniques are based on partial evaluation, a method for compile time evaluation of subexpressions. We show dramatic improvements in query optimization time when scaling the query size while still producing high quality execution plans. Our query optimization techniques enable execution of real-world queries to RDF views of relational databases.} } @TechReport{ it:2006-015, author = {Jonas Persson and Jonatan Eriksson}, title = {Pricing Turbo Warrants}, institution = it, department = tdb, year = 2006, number = {2006-015}, month = mar, abstract = {We numerically price the financial contracts named turbo warrant that were released early in 2005. They have been studied mathematically in \cite{Eriksson05} where explicit pricing formulas for the Geometric Brownian motion were derived. For more general underlying stochastic processes we have no analytical formulas and numerical methods are necessary. In this work two different methods are compared, stochastic pricing using a Monte Carlo method and a deterministic PDE approach using finite differences. The methods are evaluated in terms of numerical efficiency, computation time and accuracy. In the numerical experiments the geometric Brownian motion has been used as underlying stochastic process. Our results show that for low accuracy the methods are almost equal in efficiency but for higher accuracy the finite difference method is much more efficient.} } @TechReport{ it:2006-014, author = {Amoignon, O.}, title = {Moving Mesh Adaptation for Aerodynamic Shape Optimization}, institution = it, department = tdb, year = 2006, number = {2006-014}, month = mar, abstract = {A method of mesh adaptation is proposed for gradient-based aerodynamic shape optimization. The method consists in coupling an equation for the mesh node coordinates with the discretized Euler equations of gas dynamics in steady state. The variational mesh equation is inspired by Winslow's variable diffusion mapping. The system of mesh and flow equations is solved, instead of the flow equations alone, when performing shape optimization. The solution algorithm of the coupled equations is an approximate Newton method supplemented with an interpolation of the variable diffusivity by radial basis functions. Tests are carried out for supersonic flow over a wedge, a problem that is used here as a benchmark for the mesh adaptation and for a simple problem of inverse design. At a given design, the method of adaptation improves the accuracy of the calculated drag, a functional that is used in the construction of the inverse problem. The accuracy of the shape, obtained by inverse design, experiences similar improvements due to the mesh adaptation scheme.} } @TechReport{ it:2006-013, author = {Amoignon, O. and Berggren, M.}, title = {Adjoint of a Median-Dual Finite-Volume Scheme Applied to {2D} and {3D} Transonic Aerodynamic Shape Optimization}, institution = it, department = tdb, year = 2006, number = {2006-013}, month = mar, abstract = {The sensitivity analysis is a crucial step in algorithms for gradient-based aerodynamic shape optimization. The analysis involves computing the gradient of functionals such as drag, lift, or aerodynamic moments, with respect to the parameters of the design. Gradients are efficiently calculated by solving adjoints of the linearized flow equations. The flow is modeled by the Euler equations of gas dynamics, solved in Edge, a Computational Fluid Dynamics (CFD) code for unstructured meshes. The adjoint equations and expressions for the gradients are derived here in the fully discrete case, that is, the mappings from the design variables to the functional's values involve the discretized flow equations, a mesh deformation equation, and the parameterization of the geometry. We present a formalism and basic properties that enable a compact derivation of the adjoint for discretized flow equations obeying an edge-based structure, such as the vertex-centered median-dual finite volume discretization implemented in Edge. This approach is applied here to the optimization of the RAE~2822 airfoil and the ONERA~M6 wing. In particular, we show a method to parameterize the shape, in 2D, in order to enforce smoothness and linear geometrical constraints.} } @TechReport{ it:2006-012, author = {Marianne Ahlford and Martin Andersson and Hanna Blomquist and Magnus Ekstr{\"o}m and Lars Ericsson and Johannes Gumbel and Anna Holmgren and Petter Holmberg and Leonard Kia and Anton Lindstr{\"o}m and Magnus Lyrberg and Shaman Mahmoudi and Bengt Nolin and Jesper Sundien and Henrik Wallentin and Olle G{\"a}llmo and Anders Hessel and Leonid Mokrushin and Paul Pettersson}, title = {Uppsala {U}nderdogs - A Robot Soccer Project}, institution = it, department = docs, year = 2006, number = {2006-012}, month = mar, abstract = {In this paper, we describe the four-legged soccer team Uppsala Underdogs developed by a group of 4th year computer science students at Uppsala University during the fall of 2004. The project is based on the experience from two similar previous projects. This year the emphasis of the project has been on distribution of data and on support for evaluation and reconfiguration of strategies. To support data distribution, a middleware has been developed, which implements a replication algorithm and provides a clean interface for the other software modules (or behaviors). To enable easy reconfiguration of strategies, an automata-based graphical description language has been developed, which can be compiled into code that uses the database and the lower level modules, such as tactics and positioning, to make decisions and control the robot. In addition, a graphical simulator has been developed in which the strategies can be evaluated.} } @TechReport{ it:2006-011, author = {Bahram Bahar and Adam Bolcsfoldi and Jonas Falkevik and Roger Jakobsen and Kristoer Kobosko and Jimmy Kallstrom and Alexander Harju and Andreas Hasselberg and Johan Persson and Mattias Wadman and Olle G{\"a}llmo and Anders Hessel and Leonid Mokrushin and Paul Pettersson.}, title = {Developing {A-GPS} as a Student Project}, institution = it, department = docs, year = 2006, number = {2006-011}, month = mar, abstract = {The project consists of ten fourth-year computer science students at Uppsala University developing an A-GPS (Assisted-GPS) system. During the fall term of 2005 the students have developed a module for GPS-calculations in a GSM-network and an application that demonstrates a possible way of using the calculations module. This paper describes the design, the development process and the results of the project.} } @TechReport{ it:2006-010, author = {Mattias Andreasson and Andr{\'e}e Bylund and Syrus Dargahi and Daniel Johansson and Martin Larsson and Bennie Lundmark and Jonas Mellberg and Fredrik Stenh and Olle G{\"a}llmo and Anders Hessel and Leonid Mokrushin and Paul Pettersson}, title = {Project {A}vatar - Developing a Distributed Mobile Phone Game}, institution = it, department = docs, year = 2006, number = {2006-010}, month = mar, note = {Online copy corrected April 7, 2006}, abstract = {Team Avatar, as the members of Project Avatar have come to be known by, is a group of 4th year computer science students at Uppsala University that have been developing a distributed mobile phone game during the fall of 2005. In this paper we describe the general design and environment of the result of Project Avatar -- the game Three Crowns.} } @TechReport{ it:2006-009, author = {Parosh Aziz Abdulla and Noomene Ben Henda and Richard Mayr and Sven Sandberg}, title = {Eager Markov Chains}, institution = it, department = docs, year = 2006, number = {2006-009}, month = mar, abstract = {We consider infinite-state discrete Markov chains which are \emph{eager}: the probability of avoiding a defined set of final states for more than $n$ steps decreases exponentially in $n$. We study the problem of computing the expected reward (or cost) of runs until reaching the final states, where rewards are assigned to individual runs by computable reward functions. We present a path exploration scheme, based on forward reachability analysis, to approximate the expected reward up-to an arbitrarily small error, and show that the scheme is guaranteed to terminate in the case of eager Markov chains. We show that eager Markov chains include those induced by Probabilistic Vector Addition Systems with States, Noisy Turing Machines, and Probabilistic Lossy Channel Systems.} } @TechReport{ it:2006-008, author = {Pavel Krcal and Wang Yi}, title = {Communicating Timed Automata: The More Synchronous, the More Difficult to Verify}, institution = it, department = docs, year = 2006, number = {2006-008}, month = mar, abstract = {We study channel systems whose behaviour (sending and receiving messages via unbounded FIFO channels) must follow given timing constraints specifying the execution speeds of the local components. We propose Communicating Timed Automata (CTA) to model such systems. The goal is to study the borderline between decidable and undecidable classes of channel systems in the timed setting. Our technical results include: (1) CTA with one channel without shared states in the form $(A_1,A_2, c_{1,2})$ is equivalent to one-counter machine, implying that verification problems such as checking state reachability and channel boundedness are decidable, and (2) CTA with two channels without sharing states in the form $(A_1,A_2,A_3, c_{1,2},c_{2,3})$ has the power of Turing machines. Note that in the untimed setting, these systems are no more expressive than finite state machines. We show that the capability of synchronizing on time makes it substantially more difficult to verify channel systems.} } @TechReport{ it:2006-007, author = {Stefan Engblom}, title = {Gaussian Quadratures with Respect to Discrete Measures}, institution = it, department = tdb, year = 2006, number = {2006-007}, month = feb, abstract = {In analogy to the subject of Gaussian integration formulas we present an overview of some Gaussian \emph{summation} formulas. The derivation involve polynomials that are orthogonal under discrete inner products and the resulting formulas are useful as a numerical device for summing fairly general series. Several illuminating examples are provided in order to present various aspects of this not very well-known technique.} } @TechReport{ it:2006-006, author = {Hans Hansson (editor)}, title = {ARTES - A Network for Real-Time Research and Graduate Education in Sweden 1997--2006}, institution = it, department = docs, year = 2006, number = {2006-006}, note = {Also published as report MDH-MRTC-197/2006-1-SE, M{\"a}lardalens H{\"o}gskola.}, abstract = {This book summarizes the results of the Swedish national real-time systems research initiative ARTES and provides a few representative examples of the science and scientific results that have emerged from ARTES. ARTES was supported by the Swedish Foundation for Strategic Research (SSF), with a total of 95 MSEK between 1998 and 2006. ARTES has unified and given strength to the Swedish real-time and embedded systems research community, and contributed substantially to advancing Sweden's international position in this area. ARTES has had a catalytic and coordinating effect for a total research effort extending far beyond the funding provided by SSF. It has created important synergies between disciplines, ensured industrial relevance in research, and facilitated important academic and industrial networking for approximately 100 senior researchers and some 200 post-graduate students.} } @TechReport{ it:2006-005, author = {Magnus {\AA}gren and Pierre Flener and Justin Pearson}, title = {Inferring Variable Conflicts for Local Search from High-Level Models}, institution = it, department = csd, year = 2006, number = {2006-005}, month = feb, note = {Updated March 2006}, abstract = {For efficiency reasons, neighbourhoods in local search algorithms are often shrunk by only considering moves modifying variables that actually contribute to the overall penalty. These are known as conflicting variables. This is a well-known technique for speeding up search. State-of-the-art solutions to, e.g., the progressive party problem exploit this with great success. We propose a way of automatically and incrementally measuring the conflict of a variable in a local search model and apply this to the set variables of models expressed in existential second-order logic extended with counting (ESOL+). Furthermore, we show that this measure is lower-bounded by an intuitive conflict measure, and upper-bounded by the penalty of the model. We also demonstrate the usefulness of the approach by replacing a built-in global constraint by a modelled ESOL+ version thereof, while still obtaining competitive results. This is especially attractive when a particular (global) constraint is not built in.} } @TechReport{ it:2006-004, author = {Owe Axelsson and Janos Karatson}, title = {Symmetric Part Preconditioning of the {CG} for {S}tokes Type Saddle-Point Systems}, institution = it, department = tdb, year = 2006, number = {2006-004}, month = feb, abstract = {A nonsymmetric formulation of saddle-point systems is considered and symmetric part preconditioning of the conjugate gradient method (CGM) is applied. Linear and superlinear convergence estimates are derived for the finite element solution of the Stokes problem and of Navier's equations of elasticity.} } @TechReport{ it:2006-003, author = {Fredrik Bergholm}, title = {Intermittent Open Boundaries for Shallow Water Equations}, institution = it, department = tdb, year = 2006, number = {2006-003}, month = feb, abstract = {Shallow water equations are used in oceanography, acoustics and electromagnetics. This report (to be submitted as article) focuses on quickly moving shallow water waves in oceanography (also Coriolis force). In wave propagation modelling, it is very often attractive to split up a numerical model into smaller pieces. For barotropic wave modelling, in oceanography, this is almost a necessity. Wave propagation in a submodel communicates with adjacent region(s) by open boundaries. An open boundary is always a half measure, but there are interesting ways of making the open boundaries ``softer'' -- a kind of half-open boundaries. In this article, we investigate the usefulness of having \emph{intermittent open boundaries}. They tend to produce less errors, and may be analyzed analytically, also. The basic principle is to have both an open boundary condition and a suitable so--called transition condition.} } @TechReport{ it:2006-002, author = {Torsten S{\"o}derstr{\"o}m}, title = {Statistical Analysis of the {F}risch Scheme for Identifying Errors-in-Variables Systems}, institution = it, year = 2006, number = {2006-002}, month = jan, abstract = {Several estimation methods have been proposed for identifying errors-in-variables systems, where both input and output measurements are corrupted by noise. One of the promising approaches is the so called Frisch scheme. This paper provides an accuracy analysis of the Frisch scheme applied to system identification. The estimates of the system parameters and the noise variances are shown to be asymptotically Gaussian distributed. An explicit expression for the covariance matrix of the asymptotic distribution is given as well. Numerical simulations support the theoretical results. A comparison with the Cramer-Rao lower bound is also given in examples, and it is shown that the Frisch scheme gives a performance close to the Cramer-Rao bound for large signal-to-noise ratios. } } @TechReport{ it:2006-001, author = {Lars Ferm and Per L{\"o}tstedt}, title = {Numerical Method for Coupling the Macro and Meso Scales in Stochastic Chemical Kinetics}, institution = it, department = tdb, year = 2006, number = {2006-001}, month = jan, abstract = {A numerical method is developed for simulation of stochastic chemical reactions. The system is modeled by the Fokker-Planck equation for the probability density of the molecular state. The dimension of the domain of the equation is reduced by assuming that most of the molecular species have a normal distribution with a small variance. The numerical approximation preserves properties of the analytical solution such as non-negativity and constant total probability. The method is applied to a nine dimensional problem modelling an oscillating molecular clock. The oscillations stop at a fixed point with a macroscopic model but they continue with our two dimensional, mixed macroscopic and mesoscopic model.} } @TechReport{ it:2005-044, author = {Paul Sj{\"o}berg and Per L{\"o}tstedt and Johan Elf}, title = {{F}okker-{P}lanck Approximation of the Master Equation in Molecular Biology}, institution = it, department = tdb, year = 2005, number = {2005-044}, month = dec, abstract = {The master equation of chemical reactions is solved by first approximating it by the Fokker-Planck equation. Then this equation is discretized in the state space and time by a finite volume method. The difference between the solution of the master equation and the discretized Fokker-Planck equation is analyzed. The solution of the Fokker-Planck equation is compared to the solution of the master equation obtained with Gillespie's Stochastic Simulation Algorithm (SSA) for problems of interest in the regulation of cell processes. The time dependent and steady state solutions are computed and for equal accuracy in the solutions, the Fokker-Planck approach is more efficient than SSA for low dimensional problems and high accuracy. } } @TechReport{ it:2005-043, author = {Krister {\AA}hlander}, title = {Sparse Generalized {F}ourier Transforms}, institution = it, department = tdb, year = 2005, number = {2005-043}, month = dec, abstract = {Block-diagonalization of sparse equivariant discretization matrices is studied. Such matrices typically arise when partial differential equations that evolve in symmetric geometries are discretized via the finite element method or via finite differences. By considering sparse equivariant matrices as equivariant graphs, we identify a condition for when block-diagonalization via a sparse variant of a generalized Fourier transform (GFT) becomes particularly simple and fast. Characterizations for finite element triangulations of a symmetric domain are given, and formulas for assembling the block-diagonalized matrix directly are presented. It is emphasized that the GFT preserves symmetric (Hermitian) properties of an equivariant matrix. By simulating the heat equation at the surface of a sphere discretized by an icosahedral grid, it is demonstrated that the block-diagonalization pays off. The gain is significant for a direct method, and modest for an iterative method. A comparison with a block-diagonalization approach based upon the continuous formulation is made. It is argued that the sparse GFT method is an appropriate way to discretize the resulting continuous subsystems, since the spectrum and the symmetry are preserved.} } @TechReport{ it:2005-042, author = {Olof Sivertsson}, title = {Construction of Synthetic {CDO} Squared}, institution = it, department = csd, year = 2005, number = {2005-042}, month = dec, note = {M.Sc. thesis}, abstract = {We present techniques used in the implementation of an efficient constraint program for the portfolio optimization (PO) problem. This important combinatorial problem in the credit derivatives market arises for example when constructing synthetic collateralized debt obligations (CDOs) squared. A close relationship with the balanced incomplete block design (BIBD) problem exists which we make use of. Due to the large size of typical PO instances, global solving is not possible, instead we embed and solve sub-instances. The high quality of our approximate solutions can be assessed by comparison with a tight lower bound on the cost. Together with detection of BIBDs, symmetry breaking, extended reuse of already solved instances, and existence-checking during search, the performance of the program becomes good enough for constructing optimal portfolios of CDOs squared, with sizes common in the credit derivatives market, within minutes or seconds.} } @TechReport{ it:2005-041, author = {H{\aa}kan Zeffer and Zoran Radovic and Martin Karlsson and Erik Hagersten}, title = {{TMA}: A Trap-Based Memory Architecture}, institution = it, department = docs, year = 2005, number = {2005-041}, month = dec, note = {Revised version of Technical Report 2005-015}, abstract = {The advances in semiconductor technology have set the shared-memory server trend towards processors with multiple cores per die and multiple threads per core. We believe that this technology shift forces a reevaluation of how to interconnect multiple such chips to form larger systems. This paper argues that by adding support for \textit{coherence traps} in future chip multiprocessors, large-scale server systems can be formed at a much lower cost. This is due to shorter design time, verification and time to market when compared to its traditional all-hardware counter part. In the proposed \textit{trap-based memory architecture} (TMA), software trap handlers are responsible for obtaining read/write permission, whereas the coherence trap hardware is responsible for the actual permission check. In this paper we evaluate a TMA implementation (called \textit{TMA Lite}) with a minimal amount of hardware extensions, all contained within the processor. The proposed mechanisms for coherence trap processing should not affect the critical path and have a negligible cost in terms of area and power for most processor designs. Our evaluation is based on detailed full system simulation using out-of-order processors with one or two dual-threaded cores per die as processing nodes. The results show that a TMA based distributed shared memory system can on average perform within 1 percent of a highly optimized hardware based design.} } @TechReport{ it:2005-040, author = {Martin Karlsson}, title = {Conserving Memory Bandwidth in Chip Multi-Processors with Runahead Execution}, institution = it, department = docs, year = 2005, number = {2005-040}, month = dec, abstract = {The introduction of chip multiprocessors (CMPs) presents new challenges and trade-offs to computer architects. Architects must now strike a balance between the number of cores per chip versus the amount of on-chip cache and available pin bandwidth. Technology projections predict that the cost of pin bandwidth will increase significantly and may therefore limit the number of processor cores per CMP. We observe a trend in many processor designs towards larger cache blocks for the highest level on-chip cache. A large cache block size is beneficial for workloads with a high amount of spatial locality. Our study confirms previous observations finding that significant parts of medium-sized cache blocks that are brought on-chip often remain unused and therefore wastefully consume pin bandwidth, especially for the commercial workloads studied. In this paper we target this waste by proposing a method of fine-grained fetches. In this paper we show that due to characteristics of runahead execution it is possible to remove the implicit assumption that programs exhibit abundant spatial locality, with a limited performance impact. We demonstrate, using execution-driven full system simulation, that our method of fine-grained fetching can obtain significant performance speedups in bandwidth constrained systems but also yield stable performance systems that are not bandwidth limited. } } @TechReport{ it:2005-039, author = {Erik Nordstr{\"o}m and Richard Gold and Per Gunningberg}, title = {Mythbusters: Whatever You Thought About {MANET} Routing, Think Again...}, institution = it, department = docs, year = 2005, number = {2005-039}, month = nov, abstract = {Protocol and system designs suffer from myths that may affect their nature and purpose as well as their features and capabilities. We investigate the myths surrounding mobile ad hoc networks (MANETs) and the impact of these myths on the ability to build robust, efficient and simple routing protocols. We find that myths arise because of complexity and ambiguous design and lead to confusing protocol specifications, making the protocols difficult to understand and implement in running systems. For example, both AODV and DSR are confused about their nature as routing protocols and both blur the distinction between routing and forwarding. Building on experiences from implementing AODV and DSR, we dissect these protocols and try to dispel the myths surrounding them. From this dissection, we describe a new routing protocol called Partial Link State routing (PLS). PLS is a synthesis of routing concepts from AODV and DSR with a clearer design description and decoupling of routing and forwarding.} } @TechReport{ it:2005-038, author = {GianLuigi Ferrari and Ugo Montanari and Emilio Tuosto and Bj{\"o}rn Victor and Kidane Yemane}, title = {Modelling Fusion Calculus using HD-Automata}, institution = it, year = 2005, number = {2005-038}, month = nov, note = {A shorter version appeared in the proceedings of CALCO 2005}, abstract = {We propose a coalgebraic model of the Fusion calculus based on HD-automata. The main advantage of the approach is that the partition refinement algorithm designed for HD-automata is easily adapted to handle Fusion calculus processes. Hence, the transition systems of Fusion calculus processes can be minimised according to the notion of observational semantics of the calculus. As a beneficial side effect, this also provides a bisimulation checker for Fusion calculus. } } @TechReport{ it:2005-037, author = {Marino Miculan and Emilio Tuosto and Kidane Yemane}, title = {{HD}-Automata for Open Bisimulation}, institution = it, department = docs, year = 2005, number = {2005-037}, month = nov, abstract = {HD-automata are a syntax-independent operational model introduced for dealing with history-dependent formalisms. This kind of enriched automata, where states, transitions, and labels are equipped with \emph{names} and \emph{symmetries}, have been successfully applied for modelling early and late bisimulation in $\pi$-calculus and hyperbisimulation in Fusion calculus. However, current HD-automata are not adequate for modelling open bisimulation, because in HD-automata two names cannot be unified, while open bisimulation is closed under all possible name substitution respecting name distinctions. In this paper we tackle the problem by integrating in the definition of \emph{named sets}, the basic building blocks of HD-automata, a notion of \emph{distinction:} names can coalesce if the distinction allows to. Then, we use HD-automata over named sets with distinctions for modelling the open bisimulation of $\pi$-calculus. Finally, we discuss the relationship between named sets with distinctions and their HD-automata, with the categorical counterparts based on presheaf categories.} } @TechReport{ it:2005-036, author = {Mahen Jayawardena and Kajsa Ljungberg and Sverker Holmgren}, title = {Using Parallel Computing and Grid Systems for Genetic Mapping of Multifactorial Traits}, institution = it, department = tdb, year = 2005, number = {2005-036}, month = dec, abstract = {We present a flexible parallel implementation of the exhaustive grid search algorithm for multidimensional QTL mapping problems. A generic, parallel algorithm is presented and a two-level scheme is introduced for partitioning the work corresponding to the independent computational tasks in the algorithm. At the outer level, a static block-cyclic partitioning is used, and at the inner level a dynamic pool-of-tasks model is used. The implementation of the parallelism at the outer level is performed using scripts, while MPI is used at the inner level. By comparing to results from the SweGrid system to those obtained using a shared memory server, we show that this type of application is highly suitable for execution in a grid framework.} } @TechReport{ it:2005-035, author = {Kajsa Ljungberg and Kateryna Mishchenko and Sverker Holmgren}, title = {Efficient Algorithms for Multi-Dimensional Global Optimization in Genetic Mapping of Complex Traits}, institution = it, department = tdb, year = 2005, number = {2005-035}, month = nov, abstract = {We present a two-phase strategy for optimizing a multi-dimensional, non-convex function arising during genetic mapping of quantitative traits. Such traits are believed to be affected by multiple so called QTL, and searching for d QTL results in a d-dimensional optimization problem with a large number of local optima. We combine the global algorithm DIRECT of Jones et al. with a number of local optimization methods that accelerate the final convergence, and adapt the algorithms to problem-specific features. We also improve the evaluation of the QTL mapping objective function to enable exploitation of the smoothness properties of the optimization landscape. Our best two-phase method is demonstrated to be accurate in at least six dimensions and up to ten times faster than currently used QTL mapping algorithms.} } @TechReport{ it:2005-034, author = {Maria Eriksson}, title = {Detecting Symmetries in Relational Models of {CSP}s}, institution = it, department = csd, year = 2005, number = {2005-034}, month = nov, note = {M.Sc. thesis}, abstract = {This master's thesis studies symmetry detection in constraint satisfaction problems (CSPs). After modelling some well-known CSPs in a relational language, the symmetries of the single constraints in the models are studied. These results are then transformed into general patterns for symmetry detection. When the symmetries of the single constraints have been detected, symmetries for complete CSPs, with one or more constraints, are derived compositionally. This work deals with value as well as variable interchangeability.} } @TechReport{ it:2005-033, author = {Kajsa Ljungberg}, title = {Efficient Evaluation of the Residual Sum of Squares for Quantitative Trait Locus Models in the Case of Complete Marker Genotype Information}, institution = it, department = tdb, year = 2005, number = {2005-033}, month = nov, abstract = {\textbf{Motivation:} A core computation of many popular quantitative trait locus, QTL, mapping methods is determining the residual sum of squares, RSS, for a regression of trait values on (pseudo-)marker genotypes. A single evaluation is easily performed using the standard method QR factorization, but together the RSS computations take considerable time and often constitute the major part of the computational effort. \textbf{Results:} We present an algorithm for RSS evaluation that is mathematically equivalent to evaluation via QR factorization but 10-100 times faster depending on the model and data dimensions. It can be used for all standard QTL models. Our method opens the possibility for more detailed data analysis and more extensive model comparisons. \textbf{Availability:} C code, detailed derivations and general implementation strategies are available from the author on request. \textbf{Contact:} kajsa.ljungberg@it.uu.se} } @TechReport{ it:2005-032, author = {Johansson, Niklas and Sandblad, Bengt}, title = {Efficient {IT} support in care for the elderly - Experiences from the {VIHO} project}, institution = it, department = hci, year = 2005, number = {2005-032}, month = nov, abstract = {The main research objective for the VIHO project (Efficient Computer Support in Care for the Elderly) was to investigate how a home care and help service organization can be developed in order to be better prepared for future challenges, and how new technical systems could support the development process. We have studied the home help personnel’s need for support and investigated how the new organization could be formed in order to provide a healthy and sustainable work. Initial focus has been on the essential parts of the work and how the work could be improved in the future, and not on design of the technical support systems. Our basic point of view has been that correctly used, new technology can contribute, so that work and organisation develops in a positive way, patients are feeling sequre and the personnel’s work environment is improved. This means that the organisation better can fulfil expectations and requirements. The professions can be strengthened and the organisation will be able to meet future challanges. In this report we briefly describe the results and the methods used in the project.} } @TechReport{ it:2005-031, author = {Jan Nordstr{\"o}m and R.C. Swanson}, title = {Boundary Conditions for a Divergence Free Velocity-Pressure Formulation of the Incompressible {N}avier-{S}tokes Equations}, institution = it, department = tdb, year = 2005, number = {2005-031}, month = nov, day = 9, abstract = {New sets of boundary conditions for the velocity-pressure formulation of the incompressible Navier-Stokes equations are derived. The boundary conditions have the same form on both inflow and outflow boundaries and lead to a divergence free solution.} } @TechReport{ it:2005-030, author = {Erik B{\"a}ngtsson}, title = {A Consistent Stabilized Formulation for a Nonsymmetric Saddle-Point Problem}, institution = it, department = tdb, year = 2005, number = {2005-030}, month = oct, abstract = {In this report we study the stability of a nonsymmetric saddle-point problem. It is discretized with equal order finite elements and stabilized with a consistent regularization. In this way we achieve a stable finite element discretization of optimal order approximation properties.} } @TechReport{ it:2005-029, author = {Niklas Johansson and Stina Nylander}, title = {Usability in Mobile {IT} systems}, institution = it, department = hci, year = 2005, number = {2005-029}, month = oct, abstract = {In this report we give an overview of usability aspects applicable to mobile devices and systems. A number of research projects from three different application areas are presented and experiences from the projects are discussed. To successfully design usable products, services and systems both for leisure and for mobile work practice has turned out to be a difficult undertaking. Many systems fail because of a number of reasons. Some systems do not fail, but remains difficult and cumbersome to use. A certain immaturity can be observed since developers and designers do not fully utilise the benefits and assets provided by today's technology in design of mobile systems. For mobile systems, the varying contexts of use become more important. When only relying on existing knowledge of design for stationary systems, important possibilities are often lost and the system has gone astray.} } @TechReport{ it:2005-028, author = {Erik Berg and H{\aa}kan Zeffer and Erik Hagersten}, title = {A Statistical Multiprocessor Cache Model}, institution = it, department = docs, year = 2005, number = {2005-028}, month = oct, abstract = {The introduction of general purpose microprocessors running multiple threads will put a focus on methods and tools helping a programmer to write efficient parallel applications. Such tool should be fast enough to meet a software developer's need for short turn-around time, but also accurate and flexible enough to provide trend-correct and intuitive feedback. This paper describes an efficient and flexible approach for modeling the memory system of a multiprocessor, such as those of chip multiprocessors (CMPs). Sparse data is sampled during a multithreaded execution. The data collected consist of the reuse distance and invalidation distribution for a small subset of the memory accesses. Based on the sampled data from a single run, a new mathematical formula can be used to estimate the miss rate for a memory hierarchy built from caches of arbitrarily size, cacheline size and degree of sharing. The formula further divides the misses into six categories to further aid the software developer. The method is evaluated using a large number of commercial and technical multithreaded applications. The result produced by our algorithm fed with sparse sampling data is shown to be consistent with results gathered during traditional architecture simulation.} } @TechReport{ it:2005-027, author = {Timo Qvist}, title = {Fast Simulation of Concurrent Agents with P-Nets - the GPSim tool}, institution = it, department = docs, year = 2005, number = {2005-027}, month = sep, note = {M.Sc. thesis}, abstract = {The aim of this thesis was to investigate and enlighten the applicability of P-Nets for simulating large and possibly infinite--control systems. P-Nets are generalized coloured Petri nets \cite{Petri62} which retain detailed information in the tokens regarding firing history and the scope of concurrently executing threads. P-Nets are specified with the help of a process calculi called CCS$_k$ which is a derivation of Milner's CCS \cite{milner89}, and converted to a P-Net representation prior to simulation. A simulation tool named GPSim was implemented using both generic optimization techniques and techniques specific to P-Net structure and semantics. The simulation performance of GPSim was evaluated and compared to two well--known formal verification tools with respectable pedigrees; Bell Labs' Spin and The Concurrency Workbench of the New Century from Stony Brook University.} } @TechReport{ it:2005-026, author = {Erik Abenius and Fredrik Edelvik and Christer Johansson}, title = {Waveguide Truncation Using {UPML} in the Finite-Element Time-Domain Method}, institution = it, department = tdb, year = 2005, number = {2005-026}, month = sep, abstract = {An important part of numerical waveguide modeling is the termination of the waveguide using artificial boundaries. In this paper we develop a perfectly matched layer (PML) for waveguides in the finite-element time-domain method (FETD). The PML is discretized by prism elements aligned with the direction of propagation of the waveguide. Assuming that the waveguide is discretized by tetrahedra such a grid is easily generated from a cross-sectional surface in the waveguide. The proposed method has the advantage of being general with regard to the geometry and material of the waveguide. Previous works on PML for FETD have reported problems with late-time instability. While still present in the current approach, our results indicate that the instability is less severe for the prism element PML compared to a tetrahedral PML. Moreover, it may be controlled by increasing the number of grid points in the absorbing layer. It should be noted that the instability appears long after the energy has been attenuated and therefore pose no problems in practical computations. The performance of the suggested scheme is demonstrated for several waveguide problems, including an inhomogeneous case.} } @TechReport{ it:2005-025, author = {Paul Pettersson and Wang Yi (eds.)}, title = {Pre-Proceedings of the 3rd International Conference on Formal Modelling and Analysis of Timed Systems}, institution = it, department = docs, year = 2005, number = {2005-025}, month = sep } @TechReport{ it:2005-024, author = {Torsten S{\"o}derstr{\"o}m and Erik K. Larsson and Kaushik Mahata and Magnus Mossberg}, title = {Approaches for Continuous-Time Modeling in Errors-in-Variables Identification}, institution = it, department = syscon, year = 2005, number = {2005-024}, month = sep, abstract = {Continuous-time identification is applied to an errors-in-variables setting. A continuous-time model is fitted to data consisting of discrete-time noise corrupted input and output measurements. The noise-free input is modelled as a continuous-time ARMA process. It is described how the Cram{\'e}r-Rao lower bound for the estimation problem can be computed. Several parameter estimation approaches for the problem are presented, and also illustrated in a short numerical study. } } @TechReport{ it:2005-023, author = {Per L{\"o}tstedt and Lars Ferm}, title = {Dimensional Reduction of the {F}okker-{P}lanck Equation for Stochastic Chemical Reactions}, institution = it, department = tdb, year = 2005, number = {2005-023}, month = aug, abstract = {The Fokker-Planck equation models chemical reactions on a mesoscale. The solution is a probability density function for the copy number of the different molecules. The number of dimensions of the problem can be large making numerical simulation of the reactions computationally intractable. The number of dimensions is reduced here by deriving partial differential equations for the first moments of some of the species and coupling them to a Fokker-Planck equation for the remaining species. With more simplifying assumptions, another system of equations is derived consisting of integro-differential equations and a Fokker-Planck equation. In this way, the simulation of the chemical networks is possible without the exponential growth in computatational work and memory of the original equation and with better modelling accuracy than the macroscopic reaction rate equations. Some terms in the equations are small and are ignored. Conditions are given for the influence of these terms to be small on the equations and the solutions. The difference between different models is illustrated in a numerical example.} } @TechReport{ it:2005-022, author = {Torbj{\"o}rn Wigren}, title = {{MATLAB} Software for Recursive Identification and Scaling Using a Structured Nonlinear Black-box Model -- Revision 2}, institution = it, department = syscon, year = 2005, number = {2005-022}, month = aug, note = {Revised version of nr 2005-002. The software package can be downloaded from \url{http://www.it.uu.se/research/publications/reports/2005-022/NRISoftwareRev2.zip}} , abstract = {This reports is intended as a users manual for a package of MATLAB scripts and functions, developed for recursive prediction error identification of nonlinear state space systems and nonlinear static systems. The core of the package is an implementation of an output error identification and scaling algorithm. The algorithm is based on a continuous time, structured black box state space model of a nonlinear system. An RPEM algorithm for recursive identification of nonlinear static systems, that re-uses the parameterization of the nonlinear ODE model, is added in the present revision of the software package. The software can only be run off-line, i.e. no true real time operation is possible. The algorithm is however implemented so that true on-line operation can be obtained by extraction of the main algorithmic loop. The user must then provide the real time environment. The software package contains scripts and functions that allow the user to either input live measurements or to generate test data by simulation. The scripts and functions for the setup and execution of the identification algorithms are somewhat more general than what is described in the references. There is e.g. support for automatic re-initiation of the algorithms using the parameters obtained at the end of a previous identification run. This allows for multiple runs through a set of data, something that is useful for data sets that are too short to allow convergence. The re-initiation step also allows the user to modify the degrees of the polynomial model structure and to specify terms that are to be excluded from the model. This makes it possible to iteratively re-fine the estimated model using multiple runs. The functionality for display of results include scripts for plotting of data, parameters, prediction errors, eigenvalues and the condition number of the Hessian. The estimated model obtained at the end of a run can be simulated and the model output plotted, alone or together with the data used for identification. Model validation is supported by two methods apart from the display functionality. First, calculation of the RPEM loss function can be performed, using parameters obtained at the end of an identification run. Secondly, the accuracy as a function of the output signal amplitude can be assessed.} } @TechReport{ it:2005-021, author = {Mei Hong and Torsten S{\"o}derstr{\"o}m and Wei Xing Zheng}, title = {Accuracy Analysis of Bias-Eliminating Least Squares Estimates for Identification of Errors-in-Variables Systems}, institution = it, department = syscon, year = 2005, number = {2005-021}, month = aug, abstract = {The bias-eliminating least squares (BELS) method is one of the consistent estimators for identifying dynamic errors-in-variables systems. The attraction of the BELS method lies in its good accuracy and its modest computational cost. In this report, we investigate the accuracy properties of the BELS estimates. It is shown that the estimated system parameters and the estimated noise variances are asymptotically Gaussian distributed. An explicit expression for the normalized covariance matrix of the estimated parameters is derived and supported by some numerical examples.} } @TechReport{ it:2005-020, author = {Stefan Engblom}, title = {Computing the Moments of High Dimensional Solutions of the Master Equation}, institution = it, department = tdb, year = 2005, number = {2005-020}, month = jun, day = 30, abstract = {Derived from the Markov character only, the master equation of chemical reactions is an accurate stochastic description of quite general systems in chemistry. Exact solutions of this equation are rare and the most frequently used approximative solution method is to write down the corresponding set of \emph{reaction rate} equations. In many cases this approximation is not valid, or only partially so, as stochastic effects caused by the natural noise present in the full description of the problem are poorly captured. In this paper it is shown how a certain set of higher order equations can be derived. It is shown by theory and example that stochastic effects are better captured using this technique while still maintaining the computational advantages of the reaction rate approach.} } @TechReport{ it:2005-019, author = {Torsten S{\"o}derstr{\"o}m}, title = {Computing the Covariance Matrix for {PEM} Estimates and the {C}ramer-{R}ao Lower Bound for Linear State Space Models}, institution = it, department = syscon, year = 2005, number = {2005-019}, month = jun, abstract = {The paper presents a complete and comprehensive algorithm for computing the asymptotic accuracy of estimated state space models. The parameterization is assumed to be give a uniquely identifiable system, but is otherwise general. It is assumed that the system matrices and the noise characteristics are smooth functions of the unknown parameters. Expressions for the asymptotic covariance matrix of the parameter estimates are derived for some variants of the prediction error method. As a special case for Gaussian distributed data, the Cram{\'e}r-Rao bound and the covariance matrix for maximum likelihood estimates are obtained.} } @TechReport{ it:2005-018, author = {Eva Olsson and Niklas Johansson and Jan Gulliksen and Bengt Sandblad}, title = {A Participatory Process Supporting Design of Future Work}, institution = it, department = hci, year = 2005, number = {2005-018}, month = may, day = 30, abstract = {This paper illustrates how a participatory design process has developed as a result of empirical studies with workers. The view on design as a process presented here necessitates an explicit analysis of work where both workers and designers participate. The analysis gives workers a possibility to reflect on present and future work practices and to evaluate effects in advance. Moreover, it provides designers insights that facilitate design of support systems that match work complexity. It has often been argued that the full potential of a new IT-support system is not reached, but that only old work procedures are preserved. It is one of the purposes of our work re-design process to support creative changes of different nature. To ascertain that the complexity of work is acknowledged and that work redesign is enabled in the design process, we recommend that workers participate to a much higher degree than present. During a number of projects this process has successively developed. In this paper, the process is described, together with a selection of cases where it has been applied. Important experiences are the need for extensive user participation in the design process and the time and resources the process must be given.} } @TechReport{ it:2005-017, author = {Tobias Bandh}, title = {Evaluation of Authentication Algorithms for Small Devices}, institution = it, department = docs, year = 2005, number = {2005-017}, month = may, abstract = {Todays small devices get more and more the possibility to communicate. This makes it more and more important to think about security problems. One part of secure communication is the authentication of the communication partner. The communication partners have to prove to each other that they are who they pretend to be. In this work we compared several authentication protocols to evaluate their suitability for usage on small devices. We chose four examples which cover the four main categories of authentication protocols: password-, public-key, secret-key, and zero-knowledge authentication. In a first step we analyzed them regarding to the number of exchanged messages and their computational complexity. In a second step two of the protocols were implemented for a Nokia 6610 mobile phone with Java MIDlets as small Device. The counterpart on the server side was realized using Java Servlets. We measured the times the protocols needed to accomplish their task. The results of those two steps were compared. We concluded that the number of exchanged messages and in particular the communication latency had a more significant influence on the results than the computational complexity of the protocols.} } @TechReport{ it:2005-016, author = {H{\aa}kan Zeffer and Erik Hagersten}, title = {Adaptive Coherence Batching for Trap-Based Memory Architectures}, institution = it, department = docs, year = 2005, number = {2005-016}, month = may, abstract = {Both software-initiated and hardware-initiated prefetching have been used to accelerate shared-memory server performance. While software-initiated prefetching require instruction set and compiler support, hardware prefetching often require additional hardware structures or extra memory state. The \textit{coherence batching} scheme proposed in this paper keeps the system completely binary transparent and does not rely on any additional hardware. Hence, it can be implemented without additional hardware in software coherent systems and improve performance for already optimized and compiled binaries. We have evaluated our proposals on a \textit{trap-based memory architecture} where fine-grained coherence permission checks are done in hardware but the coherence protocol is run in software on the requesting processor. Functional full-system simulation shows that our software-only coherence-batch scheme is able to reduce the number of coherence misses with up to 60 percent compared to a system without coherence batching. The average miss reduction is 37 percent while the average bandwidth usage is reduced.} } @TechReport{ it:2005-015, author = {H{\aa}kan Zeffer and Zoran Radovic and Martin Karlsson and Erik Hagersten}, title = {{TMA}: A Trap-Based Memory Architecture}, institution = it, department = docs, year = 2005, number = {2005-015}, month = may, note = {A revised version is available as Technical Report 2005-041}, abstract = {The advances in semiconductor technology have set the shared-memory server trend towards multiple cores per die and multiple threads per core. We believe that this technology shift forces a reevaluation of how to interconnect multiple such chips to form larger systems. This paper argues that minimal processor support for coherence traps implemented in future chip multiprocessors will provide large-scale server systems at a much lower cost in terms of engineer years, verification and time to market when compared to its traditional all-hardware counter part. In our proposal, software trap handlers are responsible for getting read/write permission, whereas the coherence trap hardware is responsible for the actual permission check. Detailed full-system simulation shows that a coherence-trap enabled distributed shared memory system can be performance competitive with its highly optimized hardware-only counter part. The evaluated systems use high-end processors with one or two dual-threaded cores per die as processing nodes.} } @TechReport{ it:2005-014, author = {Richard Gold and Mats Uddenfeldt}, title = {Daigan: Constructing Proxy Networks with {S}el{N}et}, institution = it, department = docs, year = 2005, number = {2005-014}, month = apr, abstract = {We present Daigan, a tool for constructing distributed proxy networks using the SelNet indirection layer. Using the indirection capabilities of SelNet, we can construct on-demand chains of proxies for adapting content for mobile devices. Daigan takes a specification of a device's capabilities and from that identifies which proxies are needed to meet these requirements. Based on this specification Daigan sets up a chain of proxies and transparently redirects an application's traffic to the proxy chain. We demonstrate Daigan's abilities through a scenario which performs the instantiation of a proxy chain consisting of a proxy cache, an image transcoder/compressor and a TCP header compression proxy. We describe this scenario in detail and report on implementation progress.} } @TechReport{ it:2005-013, author = {H{\aa}kan Zeffer and Zoran Radovic and Erik Hagersten}, title = {Flexibility Implies Performance}, institution = it, department = docs, year = 2005, number = {2005-013}, month = apr, abstract = {No single coherence strategy suits all applications well. Many promising adaptive protocols and coherence predictors, capable of dynamically modifying the coherence strategy, have been suggested over the years. While most dynamic detection schemes rely on plentiful of dedicated hardware, the customization technique suggested in this paper requires no extra hardware support for its per-application coherence strategy. Instead, each application is profiled using a low-overhead profiling tool. The appropriate coherence flag setting, suggested by the profiling, is specified when the application is launched. We have compared the performance of a hardware DSM (Sun WildFire) to a software DSM built with identical interconnect hardware and coherence strategy. With no support for flexibility, the software DSM runs on average 45 percent slower than the hardware DSM on the 12 studied applications, while the flexibility can get the software DSM within 11 percent. Our all-software system outperforms the hardware DSM on four applications.} } @TechReport{ it:2005-012, author = {Milena Ivanova and Tore Risch}, title = {Customizable Parallel Execution of Scientific Stream Queries}, institution = it, department = csd, year = 2005, number = {2005-012}, month = apr, abstract = {Scientific applications require processing high-volume on-line streams of numerical data from instruments and simulations. We present an extensible stream database system that allows scalable and flexible continuous queries on such streams. Application dependent streams and query functions are defined through an Object-Relational model. Distributed execution plans for continuous queries are described as high-level data flow distribution templates. Using a generic template we define two partitioning strategies for scalable parallel execution of expensive stream queries: window split and window distribute. Window split provides operators for customized parallel execution of query functions whose complexity depends on size of the data units on which they are applied. It reduces the size of stream data units using application dependent functions as parameters. By contrast, window distribute provides operators for customized distribution of entire data units without reducing their size. We evaluated these strategies for a typical high volume scientific stream application and show that window split is favorable when computational resources are limited, while window distribute is better when there are sufficient resources.} } @TechReport{ it:2005-011, author = {Per Carlsson and Arne Andersson}, title = {A Flexible Model for Tree-Structured Multi-Commodity Markets}, institution = it, department = csd, year = 2005, number = {2005-011}, month = apr, note = {Extended version of conference paper accepted for IEEE CEC2005, M{\"u}nchen, July 2005}, abstract = {In this paper we study tree-structured multi-commodity, multi-unit markets. The concept is a way to handle dependencies between commodities on the market in a tractable way. The winner determination problem of a general combinatorial market is well known to be NP-hard. It has been shown that on single-unit single-sided combinatorial auctions with tree-structured bundles the problem can be computed in polynomial time. We show that it is possible to extend this to multi-unit double-sided markets. Further it is possible to handle the commodities of a bundle not only as complements but as perfect substitutes too. Under certain conditions the computation time is still polynomial.} } @TechReport{ it:2005-010, author = {P{\"a}r Samuelsson and Bj{\"o}rn Halvarsson and Bengt Carlsson}, title = {Cost-Efficient Operation of a Denitrifying Activated Sludge Process - An Initial Study}, institution = it, department = syscon, year = 2005, number = {2005-010}, month = apr, abstract = {In this paper, possible choices of optimal set-points and cost minimizing control strategies for the denitrification process in an activated sludge process are discussed. In order to compare different criterion functions, simulations utilizing the European COST benchmark are considered. By means of operational maps the results are visualized. It is found that there is a clear set-point area where the process can be said to be efficiently controlled in an economic sense. For most reasonable operating points this optimal area corresponds to a nitrate concentration in the anoxic compartment in the interval 1-3 mg(N)/l. Furthermore, the location of this optimum does not seem to be very sensitive to changes in the ASM1 parameters. With an appropriate nitrate cost function, the legislatory authorities can place this economic optimum in an area where also the effluent regulations are met. It is also discussed how this efficient control can be accomplished.} } @TechReport{ it:2005-009, author = {Claes Olsson}, title = {Disturbance Observer-Based Automotive Engine Vibration Isolation Dealing with Non-Linear Dynamics and Transient Excitation}, institution = it, department = syscon, year = 2005, number = {2005-009}, month = apr, abstract = {Active automotive engine vibration isolation is considered where both stationary and transient engine-induced excitations as well as plant non-linearity are considered. The adopted control strategy targets the dominating spectral components of the excitation and achieves narrow band vibration isolation using feedback of disturbance states estimates. Time-varying gain-scheduled observer design, including investigations of closed-loop characteristics, is based on a linear parameter varying (LPV) approximation of the considered non-linear engine and subframe suspension system. To generate this representation, an approach of dividing the non-linear system into its linear and non-linear components where the latter is represented using a parameter dependent non-linear function is proposed. Parameter dependent quadratic stability analysis has been made tractable using an affine closed-loop system representation. High vibration isolation performance is demonstrated using co-simulations incorporating a detailed non-linear plant model and measured engine excitations. This is also achieved for engine operating conditions corresponding to rapid car accelerations, whereas the system exhibits non-linear characteristics and the fundamental frequency of the harmonic disturbance undergoes rapid time variations. Parameter dependent closed-loop quadratic stability is being shown assuming plant linearity. Yet, in the non-linear plant case, stability is guaranteed but only for limited intervals of the parameters and their time derivatives.} } @TechReport{ it:2005-008, author = {Linda Brus}, title = {Nonlinear Identification of a Solar Heating System}, institution = it, department = syscon, year = 2005, number = {2005-008}, month = mar, abstract = {The use of solar heating systems is a way of exploiting the clean and free energy from the sun. To optimize the energy gain from such a system, where the main input, the solar insolation, is an uncontrollable variable, good models of the system dynamics are required. Identification methods are often either highly specialized for the application or require an extensive amount of data, especially when the dynamics studied are nonlinear. This paper shows that by application of a new recursive system identification technique, a small scale solar heating system can be modeled with very little data, without having to tailor the model structure to the application.} } @TechReport{ it:2005-007, author = {Linda Brus}, title = {Nonlinear Identification of an Anaerobic Digestion Process}, institution = it, department = syscon, year = 2005, number = {2005-007}, month = mar, abstract = {Anaerobic digestion in bioreactors is an important technology for environmental friendly treatment of organic waste. To optimize and control such processes accurate dynamic models of the process are needed. Unfortunately, modeling of anaerobic digestion often results in high order nonlinear models with many unknown parameters, a fact that complicates controller design. This paper attempts to circumvent this problem, by application of new recursive system identification techniques, thereby radically reducing the degree of the models and the number of parameters. Experiments show that a second order nonlinear model is sufficient for accurate modeling of the system.} } @TechReport{ it:2005-006, author = {Agnes Runqvist and Magnus Mossberg and Torsten S{\"o}derstr{\"o}m}, title = {On Optimal Sensor Locations for Nonparametric Identification of Viscoelastic Materials}, institution = it, department = syscon, year = 2005, number = {2005-006}, month = feb, abstract = {The problem of optimal sensor locations in nonparametric identification of viscoelastic materials is considered. Different criteria of the covariance matrix, connected to A- and D-optimal experiment design, are considered and evaluated. The results of the paper can be used to design experiments with improved accuracy of the estimates.} } @TechReport{ it:2005-005, author = {Torsten S{\"o}derstr{\"o}m}, title = {Accuracy Analysis of the {F}risch Scheme for Identifying Errors-in-Variables Systems}, institution = it, department = syscon, year = 2005, number = {2005-005}, month = feb, abstract = {Several estimation methods have been proposed for identifying errors-in-variables systems, where both input and output measurements are corrupted by noise. One of the promising approaches is the so called Frisch scheme. This paper provides an accuracy analysis of the Frisch scheme applied to system identification. The estimates of the system parameters and the noise variances are shown to be asymptotically Gaussian distributed. An explicit expression for the covariance matrix of the asymptotic distribution is given as well. Numerical simulations support the theoretical results. A comparison with the Cramer-Rao lower bound is also given in examples, and it is shown that the Frisch scheme gives a performance close to the Cramer-Rao bound for large signal-to-noise ratios.} } @TechReport{ it:2005-004, author = {Michael Baldamus and Joachim Parrow and Bj{\"o}rn Victor}, title = {A Fully Abstract Encoding of the $\pi$-Calculus with Data Terms}, institution = it, department = docs, year = 2005, number = {2005-004}, month = feb, note = {Updated April 2005}, abstract = {The $\pi$-calculus with data terms ($\pi$T) extends the pure $\pi$-calculus by data constructors and destructors and allows data to be transmitted between agents. It has long been known how to encode such data types in $\pi$, but until now it has been open how to make the encoding \emph{fully abstract}, meaning that two encodings (in $\pi$) are semantically equivalent precisely when the original $\pi$T agents are semantically equivalent. We present a new type of encoding and prove it to be fully abstract with respect to may-testing equivalence. To our knowledge this is the first result of its kind, for any calculus enriched with data terms. It has particular importance when representing security properties since attackers can be regarded as may-test observers. Full abstraction proves that it does not matter whether such observers are formulated in $\pi$ or $\pi$T, both are equally expressive in this respect. The technical new idea consists of encoding data as table entries rather than active processes, and using a firewalled central integrity manager to ensure data security.} } @TechReport{ it:2005-003, author = {Claes Olsson}, title = {Structure Flexibility Impacts on Robust Active Vibration Isolation Using Mixed Sensitivity Optimisation}, institution = it, department = syscon, year = 2005, number = {2005-003}, month = feb, abstract = {Active vibration isolation from an arbitrarily, structurally complex receiver is considered with respect to the impacts of structure flexibility on the open- and closed-loop system characteristics. Specifically, the generally weak influence of flexibility on the open-loop transfer function in case of total force feedback, in contrast to acceleration feedback, is investigated. The open-loop system characteristics are analysed based on open-loop transfer function expressions obtained using modal expansion and on modal model order reduction techniques. To closely demonstrate and illustrate the impacts of flexibility on the closed-loop system performance and stability, a problem of automotive engine vibration isolation from a flexible subframe is presented where the neglected dynamics are represented as an output multiplicative model perturbation. A physical explanation to why the contribution of flexibility to the open-loop transfer function could be neglected in the case of total force feedback in contrast to acceleration feedback is given. Factors for an individual eigenmode to not significantly contribute to the total force output are presented where the deviation of the mode direction relative to the actuator force direction is pointed out as a key one in addition to modal mass and damping coefficient. In this context, the inherent differences between model order reduction by modal and by balanced truncation are being stressed. For the specific automotive vibration isolation application considered, the degradation of robust performance and stability is shown to be insignificant when obtaining a low order controller by using total force feedback and neglecting flexibility in the design phase.} } @TechReport{ it:2005-002, author = {Torbj{\"o}rn Wigren}, title = {{MATLAB} Software for Recursive Identification and Scaling Using a Structured Nonlinear Black-box Model --- Revision 1}, institution = it, department = syscon, year = 2005, number = {2005-002}, month = jan, note = {The software package can be downloaded from \url{http://www.it.uu.se/research/publications/reports/2005-002/NRISoftware.zip}} , abstract = {This reports is intended as a users manual for a package of MATLAB scripts and functions, developed for recursive prediction error identification of nonlinear state space systems. The core of the package is an implementation of an output error identification and scaling algorithm. The algorithm is based on a continuous time, structured black box state space model of a nonlinear system. The software can only be run off-line, i.e. no true real time operation is possible. The algorithm is however implemented so that true on-line operation can be obtained by extraction of the main algorithmic loop. The user must then provide the real time environment. The software package contains scripts and functions that allow the user to either input live measurements or to generate test data by simulation. The scripts and functions for the setup and execution of the identification algorithm are somewhat more general than what is described in the references. There is e.g. support for automatic re-initiation of the algorithm using the parameters obtained at the end of a previous identification run. This allows for multiple runs through a set of data, something that is useful for data sets that are too short to allow convergence of the algorithm.The re-initiation step also allows the user to modify the degrees of the polynomial model structure and to specify terms that are to be excluded from the model. This makes it possible to iteratively re-fine the estimated model using multiple runs. The functionality for display of results include scripts for plotting of data, parameters, prediction errors, eigenvalues and the condition number of the Hessian. The estimated model obtained at the end of a run can be simulated and the model output plotted, alone or together with the data used for identification. Model validation is supported by two methods apart from the display functionality. First, calculation of the RPEM loss function can be performed, using parameters obtained at the end of an identification run. Secondly, the accuracy as a function of the output signal amplitude can be assessed.} } @TechReport{ it:2005-001, author = {Henrik Brand{\'e}n and Per Sundqvist}, title = {Preconditioners Based on Fundamental Solutions}, institution = it, department = tdb, year = 2005, number = {2005-001}, month = jan, note = {Revised version of IT technical report 2000-032}, abstract = {We consider a new preconditioning technique for the iterative solution of linear systems of equations that arise when discretizing partial differential equations. The method is applied to finite difference discretizations, but the ideas apply to other discretizations too. If $E$ is a fundamental solution of a differential operator $P$, we have \mbox{$E\ast(Pu)=u$.} Inspired by this, we choose the preconditioner to be a discretization of an approximative inverse $K$, given by \[ (Ku)(x)=\int_{\Omega}E(x-y)u(y)dy, \qquad x\in\Omega\subset\mathds{R}^d, \] where $\Omega$ is the domain of interest. We present analysis showing that if $P$ is a first order differential operator, $KP$ is bounded, and numerical results show grid independent convergence for first order partial differential equations, using fixed point iterations. For the second order convection-diffusion equation convergence is no longer grid independent when using fixed point iterations, a result that is consistent with our theory. However, if the grid is chosen to give a fixed number of grid points within boundary layers, the number of iterations is independent of the physical viscosity parameter. Also, if GMRES is used together with the proposed preconditioner, the numbers of iterations decrease as the grid is refined, also for fixed viscosity.} } @TechReport{ it:2004-058, author = {Anna Eckerdal}, title = {On the Understanding of {O}bject and {C}lass}, institution = it, department = tdb, year = 2004, number = {2004-058}, month = dec, abstract = {This paper presents the results from a phenomenographic study of first year university students' understandings of the concepts \emph{object} and \emph{class}. The students had just finished their first programming course using Java as the programming language. The analyses of the study display qualitatively different ways to understand the concepts. These aspects of the understandings are formulated as \emph{categories of description}. Each aspect is important and relevant when solving a programming task, and a good understanding includes all aspects of the concepts. The categories show understandings of different complexity. There are students who can only express an understanding of the concepts as code and syntax rules. Other students can also express the role the objects play for the programmer and the performance of the task given, and classes as abstract data types. The students who express the richest understanding includes the understandings already mentioned, but they can also express that classes and objects depicts the real world. Learning to understand a phenomenon means, in a phenomenographic perspective, to \emph{discern} new aspects of that phenomenon. This discernment is only possible if there is a \emph{variation} in a dimension that corresponds to the aspect, critical for the specific understanding. The question of variation gives interesting implications for teaching discussed in the paper. The discernment requires the students to have a mindful kind of learning, \emph{reflective learning}. This implies for the teachers that \emph{explicitness} in the teaching is of great importance. Explicitness in the teaching is defined at several levels, not only that the different aspects of the understandings found in the study should explicitly be mentioned, but also explicitness in the explanation and variation of these aspects. The importance of offering a broad context for object oriented programming is also emphasized. This can include explaining not only the object oriented paradigm, but also to discuss some other programming paradigms, and the historical background that lead to these paradigms. It can also include giving the students the opportunity to follow a whole programming task with analysis of the problem, and to design, implement and test the program even at an early stage of their education. The paper also includes some examples how the results from the study can be implemented in the teaching to help the students in their learning process.} } @TechReport{ it:2004-057, author = {Erik Bor{\"a}lv}, title = {Evaluation and Reflections on the Design of the {WeAidU} System}, institution = it, department = hci, year = 2004, number = {2004-057}, month = dec, abstract = {The Web-Based Artificial Intelligence for Diagnostic Use (WeAidU) system is a support system providing near instantaneous professional aid in clinical decision making. The system involves neural networks trained to interpret myocardial perfusion scintigrams, and enhance the effectiveness of clinical decision making when it comes to coronary artery disease. The system is designed to work on the Internet in a client/server setup . The usability of the system is evaluated. The evaluation result is related to two previous evaluations. A link is made from evaluation results to the way the system is designed and developed. It is observed that some of the usability problems found are tied to the development process. Usability inspections locate a number of possible problem areas when it comes to the user interaction. At the same time the user's assessment of usability is satisfactory or even high. This is most likely explained by the novel nature of this computer support and a very committed user group. Clinical physiology is acknowledged to be a computerized field, where physician's acceptability and use of computers is fairly high. It is observed that the development team's, in this particular context, favored a view of usability as a quality of the product before the view of usability as integrated in the development process/product. A more integrated view of usability is suggested to remove usability errors and integration barriers, and to help the system better fit into the physician's daily routines.} } @TechReport{ it:2004-056, author = {Erik Bor{\"a}lv}, title = {Design and Evaluation of the {CHILI} System}, institution = it, department = hci, year = 2004, number = {2004-056}, month = dec, abstract = {The CHILI system is a general-purpose radiology workstation with teleradiology and telecardiology functions. The design of the CHILI user interface is made with the purpose to achieve high usability. Usability is treated as part of the product develop-ment process instead of being a separate activity. A number of design principles -- rationale -- guided the design of the user interface. The minimalism of the system comes from the approach to focus primarily on work tasks instead of functionality. This is accomplished by displaying as much information as possible in one single window. No functions are out of sight in menus or additional dialogue windows. An evaluation using questionnaires was performed to assess the usability of the system. The questionnaires include a validated usability measure (the System Usability Scale) to give a global assessment of system usability. Five experienced users were interviewed individually to complement the ques-tionnaire data. The design approach used in the CHILI system was found to be working, both from an experienced and new user's perspective. The strong sides of the system are the ease of use, the minimalism of the design, and the way the system is adapted to the working environment. Areas where it is possible to improve are the on-line help system, questions regarding available functionality, and user training. Continuous redesign and development has helped increase usability.} } @TechReport{ it:2004-055, author = {Per L{\"o}tstedt and Jonas Persson and von Sydow, Lina and Johan Tysk}, title = {Space-Time Adaptive Finite Difference Method for {E}uropean Multi-Asset Options}, institution = it, department = tdb, year = 2004, number = {2004-055}, month = dec, abstract = {The multi-dimensional Black-Scholes equation is solved numerically for a European call basket option using \emph{a priori}--\emph{a posteriori} error estimates. The equation is discretized by a finite difference method on a Cartesian grid. The grid is adjusted dynamically in space and time to satisfy a bound on the global error at the expiry date. The discretization errors in each time step are estimated and weighted by the solution of the adjoint problem. Bounds on the local errors and the adjoint solution are obtained by the maximum principle for parabolic equations. The performance of the method is illustrated by examples in one, two, three, and four dimensions.} } @TechReport{ it:2004-054, author = {Lars Ferm and Per L{\"o}tstedt and Paul Sj{\"o}berg}, title = {Adaptive, Conservative Solution of the {F}okker-{P}lanck Equation in Molecular Biology}, institution = it, department = tdb, year = 2004, number = {2004-054}, month = nov, abstract = {The Fokker-Planck equation on conservation form is discretized by a finite volume method and advanced in time by a linear multistep method. The grid cells are refined and coarsened in blocks of the grid depending on an estimate of the spatial discretization error and the time step is chosen to satisfy a tolerance on the temporal discretization error. The solution is conserved across the block boundaries so that the total probability is constant. A similar effect is achieved by rescaling the solution. The steady state solution is determined as the eigenvector corresponding to the zero eigenvalue. The method is applied to the solution of a problem with two molecular species and the simulation of a circadian clock. Comparison is made with a stochastic method.} } @TechReport{ it:2004-053, author = {Bharath Bhikkaji and Kaushik Mahata and Torsten S{\"o}derstr{\"o}m}, title = {Recursive Algorithms for Estimating the Parameters in a One Dimensional Heat Diffusion System: Analysis}, institution = it, department = syscon, year = 2004, number = {2004-053}, month = nov, abstract = {In [5], we have proposed two recursive algorithms in the frequency domain to estimate the parameters of a one dimensional heat diffusion system. There in, we have discussed in detail the construction and the implementation of the algorithms. Further in [5], we observed the convergence of the proposed algorithms using certain numerical examples. In this paper, we analyse the convergence of these algorithms from a theoretical perspective.} } @TechReport{ it:2004-052, author = {Bharath Bhikkaji and Torsten S{\"o}derstr{\"o}m and Kaushik Mahata}, title = {Recursive Algorithms for Estimating the Parameters in a One Dimensional Heat Diffusion System: Derivation and Implementation}, institution = it, department = syscon, year = 2004, number = {2004-052}, month = nov, abstract = {In this paper we consider a one-dimensional heat diffusion system, which is modeled by a partial differential equation (PDE) involving some unknown parameters. Here we estimate these unknown parameters recursively using a frequency domain approach.} } @TechReport{ it:2004-051, author = {Claes Olsson}, title = {Comparative Study of Recursive Parameter Estimation Algorithms with Application to Active Vibration Isolation}, institution = it, department = syscon, year = 2004, number = {2004-051}, month = nov, abstract = {In this paper, adaptive filtering is adopted for active automotive engine vibration isolation where both transient and stationary engine internal excitations as well as structure flexibility are considered. The adaptive filtering problem is formulated using a linear regression model representation. This allows for an application of a general family of state-of-the-art recursive parameter estimation algorithms. The performance of two specific members of this family has been compared. Those are the well-known normalised least mean square (NLMS) algorithm and a recently suggested Kalman filter based algorithm originally proposed as a method to avoid covariance windup, here referred to as Stenlund-Gustafsson (SG). A virtual non-linear 43 degrees of freedom engine and subframe suspension model and measurement based engine excitation are used in evaluation of algorithm performance. With respect to trade-off between convergence and steady-state variance, the difference Riccati equation included in SG implies superior performance of SG compared to NLMS. However, none of the proposed algorithms provide sufficient tracking performance to deal with transient engine excitation corresponding, for instance, to rapid acceleration of the car. In this case, the adaptive filtering strategy is found to be inadequate.} } @TechReport{ it:2004-050, author = {Stefan Johansson}, title = {High Order Summation by Parts Operator Based on a {DRP} Scheme Applied to {2D} Aeroacoustics}, institution = it, department = tdb, year = 2004, number = {2004-050}, month = oct, abstract = {A strictly stable high order finite difference method based on Tam and Webb's dispersion relation preserving scheme in the interior has been verified for a 2D aeroacoustic problem. Results show that the method gives lower dispersion error than a similar method derived by Strand, which is based on standard sixth order difference approximation in the interior, when boundary effects are not important.} } @TechReport{ it:2004-049, author = {Erik Bor{\"a}lv and Niklas Johansson and Emmanuel Papaioannou and Athanasios Demiris}, title = {A Design Case: Interactive Sports Content Broadcasting}, institution = it, department = hci, year = 2004, number = {2004-049}, month = oct, abstract = {Digital television is a new and interesting platform for developing multimedia services. The MELISA platform aims at cross-media broadcasting over digital television and 3G mobile networks. The platform provides presentation of interactive video content, advertisement and gaming. The devices targeted by these services are Set-Top Boxes and Portable Digital Assistants. A new service, in combination with technology that does not yet allow for very rich interaction, requires careful and inventive design of the user interfaces. The key to gaining consumer acceptance of interactive TV is ease of use. In this paper we present the system architecture briefly and discuss issues related to user interface design, considering both the type of media content and hardware platforms. We reflect on the design process used and its suitability as we as designers have experienced it.} } @TechReport{ it:2004-048, author = {Henrik L{\"o}f and Jarmo Rantakokko}, title = {Algorithmic Optimizations of a Conjugate Gradient Solver on Shared Memory Architectures}, institution = it, department = tdb, year = 2004, number = {2004-048}, month = oct, abstract = {OpenMP is an architecture-independent language for programming in the shared memory model. OpenMP is designed to be simple and powerful in terms of programming abstractions. Unfortunately, the architecture-independent abstractions sometimes come with the price of low parallel performance. This is especially true for applications with unstructured data access pattern running on distributed shared memory systems (DSM). Here proper data distribution and algorithmic optimizations play a vital role for performance. In this article we have investigated ways of improving the performance of an industrial class conjugate gradient (CG) solver, implemented in OpenMP running on two types of shared memory systems. We have evaluated bandwidth minimization, graph partitioning and reformulations of the original algorithm reducing global barriers. By a detailed analysis of barrier time and memory system performance we found that bandwidth minimization is the most important optimization reducing both L2 misses and remote memory accesses. On an uniform memory system we get perfect scaling. On a NUMA system the performance is significantly improved with the algorithmic optimizations leaving the system dependent global reduction operations as a bottleneck.} } @TechReport{ it:2004-047, author = {Iordanis Kavathatzopoulos and Jenny {\"O}hman Persson and Carl {\AA}borg}, title = {Assessing Health and Moral Stress in {IT}-Based Work}, institution = it, department = hci, year = 2004, number = {2004-047}, month = oct, abstract = {Health and Moral Stress Questionnaire (HMSQ) was constructed with the aim to assess organizational learning processes and individual skills necessary for the handling of problems connected to five areas of activity: work demands, work task control, support, computer tool use, and ethics. The main hypothesis was that the existence of organizational learning processes and individual skills contributes to lower levels of stress by providing the knowledge needed to solve the problems arising during IT-supported work. The results showed that it is possible to construct a questionnaire to assess organizational learning and personal skills as a factor defining stress level at IT-based work. High levels of reliability were obtained. In accordance to our hypothesis five factors could be discerned as well as an underlying factor representing learning processes and personal skills.} } @TechReport{ it:2004-046, author = {Per Carlsson}, title = {Market Simulations}, institution = it, department = csd, year = 2004, number = {2004-046}, month = oct, abstract = {Supply and demand for electricity show conciderable fluctuations with season and time of day. Novel market-based methods for supply -- demand matching are currently investigated in a European project. This report covers simulations under preparation as well as the outcome of a first simulation on the time-scale of day-ahead power markets.} } @TechReport{ it:2004-045, author = {Per Carlsson and Arne Andersson}, title = {A Flexible Model for Tree-Structured Multi-Commodity Markets}, institution = it, department = csd, year = 2004, number = {2004-045}, month = oct, abstract = {In this article we study tree-structured multi-commodity markets. The concept is a way to handle dependencies between commodities on the market in a tractable way. The winner determination problem of a general combinatorial market is well known to be NP-hard. It has been shown that on single-unit single-sided auctions with tree-structured bundles the problem can be computed in polynomial time. We show that it is possible to extend this to multi-unit double-sided markets. Further it is possible to handle the commodities of a bundle not only as complements but as perfect substitutes too. Under certain conditions the computation time is still polynomial.} } @TechReport{ it:2004-044, author = {Erik Cedheim and Ramzi Ferchichi and Anders Jonsson and Dan Lind and Henrik Nyman and Olof Sivertsson and Andreas Widenfalk and J{\"o}ns {\AA}kerlund and Leonid Mokrushin and Paul Pettersson}, title = {Kelb - A Real-Time Programming Environment for the {Sony} {Aibo}}, institution = it, department = docs, year = 2004, number = {2004-044}, month = oct, abstract = {Kelb is a new real-time programming environment developed at Uppsala University for the Sony AIBO ERS-210. It is aimed to provide efficiency by introducing a notion of light-weight tasks executing according to well-known real-time scheduling algorithms and resource protocols, while still allowing applications to be developed in a high-level abstract programming language. In this paper we give an overview of the design of Kelb and describe the status of the environment, currently including: a real-time programming language and compiler extending gcc for MIPS with support for time- and event-triggered tasks, a runtime library with support for static and dynamic preemptive scheduling algorithms (e.g. fixed priority and earliest deadline first), and a prototype connection to the Times tool allowing Kelb designs to be analysed for schedulabilty.} } @TechReport{ it:2004-043, author = {Iordanis Kavathatzopoulos and Jenny Persson and Carl {\AA}borg}, title = {Skattekontoret i Falun: Ett m{\"o}nsterkontor}, institution = it, department = hci, year = 2004, number = {2004-043}, month = oct, note = {In Swedish. Originally written 2002}, abstract = {Denna rapport {\"a}r en utv{\"a}rdering av {\aa}tg{\"a}rder som genomf{\"o}rdes p{\aa} skattemyndighetens kontor i Falun som en del av m{\"o}nsterkontorsprojektet. {\AA}tg{\"a}rdernas syfte var att tillf{\"o}ra individer och organisation f{\"a}rdighetsbaserat l{\"a}rande f{\"o}r att minska otrivsel, stress och oh{\"a}lsa. Avdelningen f{\"o}r m{\"a}nniska-datorinteraktion vid Uppsala universitet har deltagit som st{\"o}d i anstr{\"a}ngningarna f{\"o}r f{\"o}rb{\"a}ttring av arbetsmilj{\"o}n samt i utv{\"a}rderingen av genomf{\"o}rda {\aa}tg{\"a}rder som en del i VERKA-projektet. Utv{\"a}rderingen har baserats p{\aa} data som har samlats genom intervjuer, fr{\aa}geformul{\"a}r och testinstrument. Resultaten visar att st{\"a}mningen p{\aa} Falunkontoret {\"a}r b{\"a}ttre nu {\"a}n innan. De anst{\"a}llda har en klar {\"o}nskan att till{\"a}mpa det de l{\"a}rt sig samt att forts{\"a}tta med nya och kompletterande {\aa}tg{\"a}rder. De har ocks{\aa} ett stort behov att f{\"o}rb{\"a}ttra IT-st{\"o}det.} } @TechReport{ it:2004-042, author = {Per Sundqvist}, title = {Boundary Summation Equations}, institution = it, department = tdb, year = 2004, number = {2004-042}, month = sep, abstract = {A new solution method for systems of partial difference equations is presented. It can be seen as a discrete counterpart of boundary integral equations, but with sums instead of integrals. The number of unknowns in systems of linear difference equations with constant coefficients defined on uniform $d$-dimensional grids are reduced so that one dimension is eliminated. The reduction is obtained using fundamental solutions of difference operators, yielding a reduced system that is dense. The storage of the reduced system requires $\mathcal{O}(N)$ memory positions, where $N$ is the length of the original vector of unknowns. The application of the matrix utilizes fast Fourier transform as its most complex operation, and requires hence $\mathcal{O}(N\log N)$ arithmetic operations. Numerical experiments are performed, exploring the behavior of GMRES when applied to reduced systems originating from discretizations of partial differential equations. Model problems are chosen to include scalar equations as well as systems, with various boundary conditions, and on differently shaped domains. The new solution method performs well for an upwind discretization of an inviscid flow-problem. A proof of grid independent convergence is given for a simpler iterative method applied to a specific discretization of a first order differential equation. The numerical experiments indicate that this property carries over to many other problems in the same class. } } @TechReport{ it:2004-041, author = {Paul Pettersson and Wang Yi (Eds.)}, title = {Proceedings of the 16th Nordic Workshop on Programming Theory}, institution = it, department = docs, year = 2004, number = {2004-041}, month = oct, abstract = {The objective of the Nordic Workshop on Programming Theory is to bring together researchers from (but not limited to) the Nordic and Baltic countries interested in programming theory, in order to improve mutual contacts and cooperation. The 16\emph{th} Nordic Workshop on Programming Theory took place at the Uppsala University, Sweden, 6-8 October 2004. The previous workshops were held in Uppsala (1989 and 1999), Aalborg (1990), Gothenburg (1991 and 1995), Bergen (1992 and 2000), Turku (1993, 1998, and 2003), Aarhus (1994), Oslo (1996), Tallinn (1997 and 2002), and in Lyngby (2001). There were 39 regular presentations at the workshop, arranged in two parallel sessions. In addition the following five invited speakers gave presentations in plenary sessions: Erik Hagersten (Uppsala Univ., Sweden), Neil D. Jones (Copenhagen Univ., Denmark), Kim G. Larsen (Aalborg Univ., Denmark), P.S. Thiagarajan (National University of Singapore), and Michael Williams (Ericsson, Sweden). } } @TechReport{ it:2004-040, author = {Magnus Sv{\"a}rd and Jan Nordstr{\"o}m}, title = {On the Order of Accuracy for Difference Approximations of Initial-Boundary Value Problems}, institution = it, department = tdb, year = 2004, number = {2004-040}, month = sep, abstract = {Finite difference approximations of the second derivative in space appearing in, parabolic, incompletely parabolic systems of, and second order hyperbolic, partial differential equations are considered. If the solution is pointwise bounded, we prove that finite difference approximations of those classes of equations can be closed with two orders less accuracy at the boundary without reducing the global order of accuracy. This result is generalised to initial-boundary value problems with an $m$th order principal part. Then, the boundary accuracy can be lowered $m$ orders. Further, it is shown that summation-by-parts operators with approximating second derivatives are pointwise bounded. Linear and nonlinear computations corroborates the theoretical results.} } @TechReport{ it:2004-039, author = {Jing Gong and Jan Nordstr{\"o}m}, title = {A Stable Hybrid Method for Hyperbolic Problems}, institution = it, department = tdb, year = 2004, number = {2004-039}, month = sep, abstract = {A stable hybrid method that combines the unstructured finite volume method with high order finite difference methods have been developed. The coupling procedure is based on energy estimates and stability can be guaranteed. Numerical calculations verify that the hybrid method is efficient and accurate.} } @TechReport{ it:2004-038, author = {Martin Nilsson}, title = {Different Methods that Reduce Cost in Monostatic {RCS} Computations for {MOM} Accelerated by {MLFMA}}, institution = it, department = tdb, year = 2004, number = {2004-038}, month = sep, note = {To appear in Proceedings of EMB04 - Computational Electromagnetics - Methods and Applications, October 18-19, 2004}, abstract = {The problem of computing the monostatic Radar Cross Section with the Method of Moments accelerated by the multilevel Fast Multipole algorithm is considered. For electrically large objects the problem becomes too expensive in terms of computational work. This paper suggests two methods that can reduce the work, the Minimal Residual Interpolation method and a new fast approximate method related to the Fast Physical Optics method. Numerical experiments show that the new method gives reasonable results, while reducing the work substantially for large objects.} } @TechReport{ it:2004-037, author = {Torsten S{\"o}derstr{\"o}m and Mei Hong}, title = {Identification of Dynamic Errors-in-Variables Systems with Periodic Data}, institution = it, department = syscon, year = 2004, number = {2004-037}, month = aug, abstract = {Using instrumental variable methods to estimate the parameters of dynamic errors-in-variables systems with a periodic input signal is the focus in this report. How to choose suitable instrumental variable vectors is the key point. Four variants are proposed; all of them can generate consistent estimates. An analysis shows that the best accuracy is achieved by using a specific overdetermined instrumental variable vector. Numerical illustrations demonstrate the effectiveness of the proposed IV3 method for both white and colored measurement noise. It is superior to alternative methods under low signal to noise ratios.} } @TechReport{ it:2004-036, author = {Stefan Johansson}, title = {High Order Finite Difference Operators with the Summation by Parts Property Based on {DRP} Schemes}, institution = it, department = tdb, year = 2004, number = {2004-036}, month = aug, abstract = {Strictly stable high order finite difference methods based on Tam and Webb's dispersion relation preserving schemes have been constructed. The methods have been implemented for a 1D hyperbolic test problem, and the theoretical order of accuracy is observed.} } @TechReport{ it:2004-035, author = {Emad Abd-Elrady and Torsten S{\"o}derstr{\"o}m}, title = {Bias Analysis in Least Squares Estimation of Periodic Signals Using Nonlinear {ODE}'s}, institution = it, department = syscon, year = 2004, number = {2004-035}, month = aug, abstract = {Periodic signals can be modeled by means of second-order nonlinear ordinary differential equations (ODE's). The right hand side function of the ODE is parameterized in terms of known basis functions. The least squares algorithm developed for estimating the coefficients of these basis functions gives biased estimates, especially at low signal to noise ratios. This is due to noise contributions to the periodic signal and its derivatives evaluated using finite difference approximations. In this paper an analysis for this bias is given.} } @TechReport{ it:2004-034, author = {Parosh Abdulla and Pritha Mahata and Richard Mayr}, title = {Decidability of Zenoness, Token Liveness and Boundedness of Dense-Timed Petri Nets}, institution = it, department = docs, year = 2004, number = {2004-034}, month = aug, note = {To appear in FSTTCS '04}, abstract = {We consider 'Timed Petri Nets (TPNs)' : extensions of Petri nets in which each token is equipped with a real-valued clock. We consider the following three verification problems for TPN. (i) 'Zenoness:' whether there is an infinite computation from a given marking which takes only a finite amount of time. We show decidability of zenoness for TPNs, thus solving an open problem from \cite{Escrig:etal:TPN}. (ii) 'Token liveness:' whether a token is {\it alive} in a marking, i.e., whether there is a computation from the marking which eventually consumes the token. We show decidability of the problem by reducing it to the 'coverability problem', which is decidable for TPNs. (iii) 'Boundedness:' whether the size of the reachable markings is bounded. We consider two versions of the problem; namely 'semantic boundedness' where only live tokens are taken into consideration in the markings, and 'syntactic boundedness' where also dead tokens are considered. We show undecidability of semantic boundedness, while we prove that syntactic boundedness is decidable through an extension of the Karp-Miller algorithm.} } @TechReport{ it:2004-033, author = {Parosh Abdulla and Johann Deneux and Pritha Mahata}, title = {Closed, Open and Robust Timed Networks}, institution = it, department = docs, year = 2004, number = {2004-033}, month = aug, note = {To appear in Infinity '04}, abstract = {We consider verification of safety properties for parameterized systems of timed processes, so called 'timed networks'. A timed network consists of a finite state process, called a controller, and an arbitrary set of identical timed processes. In [AJ03] it was shown that checking safety properties is decidable in the case where each timed process is equipped with a single real-valued clock. In [ADM04], we showed that this is no longer possible if each timed process is equipped with at least two real-valued clocks. In this paper, we study two subclasses of timed networks: 'closed' and 'open' timed networks. In closed timed networks, all clock constraints are non-strict, while in open timed networks, all clock constraints are strict (thus corresponds to syntactic removal of equality testing). We show that the problem becomes decidable for closed timed network, while it remains undecidable for open timed networks. We also consider 'robust' semantics of timed networks by introducing timing fuzziness through semantic removal of equality testing. We show that the problem is undecidable both for closed and open timed networks under the robust semantics.} } @TechReport{ it:2004-032, author = {Malin Ljungberg and Kurt Otto}, title = {Curvilinear Coordinates in a {PDE} Solver Framework; Validation}, institution = it, department = tdb, year = 2004, number = {2004-032}, month = aug, abstract = {Through the use of object-oriented analysis and design combined with variability modeling, a highly flexible software model for the metrics handling functionality of a PDE solver framework was obtained. This new model was validated against usability goals, particularly with respect to efficiency and flexibility. The method of validation together with the results are presented here. The efficiency of a pilot implementation is comparable to, in parts even higher than, that of a pre-existing application-specific reference code. With regards to flexibility it is shown that the new software model performs well for a set of four change scenarios selected by an expert user group.} } @TechReport{ it:2004-031, author = {Malin Ljungberg}, title = {Curvilinear Coordinates in a {PDE} Solver Framework; Analysis}, institution = it, department = tdb, year = 2004, number = {2004-031}, month = aug, abstract = {In the present work we demonstrate how the use of an object oriented analysis and design process, augmented by variability modeling and user centered design, results in a flexible software model for handling curvilinear coordinates in a finite difference PDE solver framework. It is shown that feature and variability modeling are appropriate tools for identifying and achieving the appropriate flexibility in this model. The resulting software model is validated through an comparative analysis involving a range of other similar models.} } @TechReport{ it:2004-030, author = {Wendy Kress and Per L{\"o}tstedt}, title = {Time Step Restrictions using Semi-Implicit Methods for the Incompressible Navier-Stokes Equations}, institution = it, department = tdb, year = 2004, number = {2004-030}, month = jul, abstract = {The incompressible Navier-Stokes equations are discretized in space and integrated in time by the method of lines and a semi-implicit method. In each time step a set of systems of linear equations has to be solved. The size of the time steps are restricted by stability and accuracy of the time-stepping scheme, and convergence of the iterative methods for the solution of the systems of equations. The stability is investigated with a linear model equation derived from the Navier-Stokes equations. The resolution in space and time is estimated from turbulent flow physics. The convergence of the iterative solvers is studied using the same model equation. The stability constraints and the convergence rate obtained from the model equation are compared to results for a semi-implicit integrator of the Navier-Stokes equations with good agreement. The most restrictive bound on the time step is given by accuracy, stability, or convergence depending on the flow conditions and the numerical method.} } @TechReport{ it:2004-029, author = {Krister {\AA}hlander and Hans Munthe-Kaas}, title = {On Applications of the Generalized Fourier Transform in Numerical Linear Algebra}, institution = it, department = tdb, year = 2004, number = {2004-029}, month = jul, abstract = {Matrices equivariant under a group of permutation matrices are considered. Such matrices typically arise in numerical applications where the computational domain exhibits geometrical symmetries. In these cases, group representation theory provides a powerful tool for block diagonalizing the matrix via the Generalized Fourier Transform. This technique yields substantial computational savings in problems such as solving linear systems, computing eigenvalues and computing analytic matrix functions. The theory for applying the Generalized Fourier Transform is explained, building upon the familiar special (finite commutative) case of circulant matrices being diagonalized with the Discrete Fourier Transform. The classical convolution theorem and diagonalization results are generalized to the non-commutative case of block diagonalizing equivariant matrices. Our presentation stresses the connection between multiplication with an equivariant matrices and the application of a convolution. This approach highlights the role of the underlying mathematical structures such as the group algebra, and it also simplifies the application of \textit{fast} Generalized Fourier Transforms. The theory is illustrated with a selection of numerical examples.} } @TechReport{ it:2004-028, author = {Lars-Henrik Eriksson}, title = {Using Formal Methods in a Retrospective Safety Case}, institution = it, department = csd, year = 2004, number = {2004-028}, month = jul, note = {Accepted for publication in the Proceedings of the 23rd International Conference on Computer Safety, Reliability and Security (SAFECOMP'04)}, abstract = {Today the development of safety-critical systems is to a large extent guided by standards that make demands on both development process and system quality. Before the advent of these standards, development was typically done on a ``best practise'' basis which could differ much between application areas. Some safety-critical systems (e.g. railway interlockings) have a long technical and economical lifetime so that today we have many legacy safety-critical systems in operation which were developed according to practises that would be regarded as unacceptable today. Usually, such systems are allowed to continue operating by virtue of past performance. If there is doubt about the integrity of a legacy system, an alternative to replacement could be making a ``retrospective'' safety case demonstrating that the legacy system is indeed safe to use. Using as example a case taken from railway signalling, we will show how formal verification can be used in a retrospective safety case. In this application of formal methods several particular problems arise, such as uncertainty about the original requirements and the required safety level of the various system functions. We will discuss such problems and the approach taken to deal with them in the example case.} } @TechReport{ it:2004-027, author = {Thorild Sel{\'e}n}, title = {Reorganisation in the Skewed-Associative {TLB}}, institution = it, department = docs, year = 2004, number = {2004-027}, month = jun, note = {M.Sc. thesis}, abstract = {One essential component and a common bottleneck in current virtual memory systems is the \emph{translation lookaside buffer} (TLB), a small, specialised cache that speeds up memory accesses by storing recently used address translations. A TLB can be viewed as a hash table that only has the capacity for holding a subset of the actively used address translations. The traditional way to increase the performance of a TLB (other than making it larger) is to increase associativity, typically performing multiple comparisons in parallel to avoid slowing down lookups; however, this is expensive in terms of chip area and energy consumption. Skewed associativity, i.e. using several different hash functions for parallel lookups, has been demonstrated to yield good results with less parallelism and therefore at a lower cost. In skewed-associative models, the sets of possible placements for two entries may only partially overlap. Thus, the current placement of entries will limit future replacement possibilities. This is an inherent inflexibility in traditional skewed-associative models, since we cannot predict which placements will enable the most desirable future replacement choices. This thesis demonstrates how the performance of skewed-associative TLB models can be enhanced further by \emph{reorganisation} --- moving old entries around to allow for more efficient replacements. This gives even more efficient usage of TLB locations, increasing performance without further complicating lookups. The thesis introduces and demonstrates a \emph{collision tendency} metric that enables simple comparison of the conflict miss vulnerability for a multitude of associativity models and degrees of associativity over a large range of sizes. Simulations demonstrate that using skewed-associative techniques and reorganisation, efficient TLBs can be implemented with far less parallelism in hardware, allowing for more compact and much less energy-consuming designs without sacrificing performance. Additionally, this thesis discusses adapting the skewed-associative TLB with reorganisation to handle real-time requirements, notably in applications where tasks with different real-time needs are run concurrently.} } @TechReport{ it:2004-026, author = {David Lundberg}, title = {Ad Hoc Protocol Evaluation and Experiences of Real World Ad Hoc Networking}, institution = it, department = docs, year = 2004, number = {2004-026}, month = jun, note = {M.Sc. thesis}, abstract = {This report give an introduction to ad hoc networking and presents a list of over 60 ad hoc routing protocols that have been proposed between 199x and 2002. It also discuss the problems of performance evaluation of these protocols and the experiences gathered during the implementation of a real world evaluation testbed. A major challenge when evaluating ad hoc routing protocol proposals is to agree on a metric under which a protocol's performance will be measured. The fact that most proposed ad hoc protocols have different goals makes it very important to find the essential properties and invent methods how to measure them. This is the main focus in this this report. The first part discuss the methods and metrics used in simulations performed during recent years. The results show that mobility models, traffic patterns, metrics and propagation models are crucial when doing simulations in order to get valid results. The second part of this paper describes a new metric called virtual mobility(vM) describing the mobility from a physical point of view opposed to geometrical or link-stability metrics. It also describes the APE-testbed (Ad hoc Protocol Evaluation) that we have created in order to be a able to conduct large scale experiments in an real environment.A lot of effort was put on making the testbed streamlined and as easy to use as possible.} } @TechReport{ it:2004-025, author = {David Lundberg}, title = {Feasibility Study of {WLAN} Technology for the Uppsala - Stockholm Commuter Train}, institution = it, department = docs, year = 2004, number = {2004-025}, month = jun, abstract = {The purpose of this study is to investigate different solutions to provide high speed Internet access on a train travelling at 200 km/h using WLAN equipment. Every day about 15000 people commute between Uppsala and Stockholm and it is reasonable to assume that about 10\% of those could use the time better on the train if they had a connection to the Internet. A study~\cite{survey} in the UK has shown that 80\% of the business travellers work on the trains and would use WLAN if it was available. At least two companies in Sweden offer a service for this but they use expensive technology with low bandwidth. The aim of this feasibility study is to investigate if it is possible to build an infrastructure with WLAN equipment and offer the customer the bandwidth of about a normal broadband connection at home. The task is also to make estimations of the costs for different solutions to be help decide about a continuation of the project.} } @TechReport{ it:2004-024, author = {Mathias Spjuth and Martin Karlsson and Erik Hagersten}, title = {Low Power and Conflict Tolerant Cache Design}, institution = it, department = docs, year = 2004, number = {2004-024}, month = may, abstract = {The common approach to reduce cache conflicts is to increase the associativity. From a dynamic cache power perspective this associativity comes at a high cost. In this paper we present miss ratio performance and dynamic power estimates for a skewed cache and also for a new organization proposed, the elbow cache. The elbow cache extends the skewed cache organization with a relocation strategy for conflicting blocks. We show that these skewed designs significantly reduces the conflict problems while consuming up to 56\% less dynamic power than a comparably performing 8-way set-associative cache.} } @TechReport{ it:2004-023, author = {Jarmo Rantakokko}, title = {Interactive Learning of Algorithms}, institution = it, department = tdb, year = 2004, number = {2004-023}, month = may, note = {Accepted to \emph{Third Program Visualization Workshop} (PVW'2004)}, abstract = {Visualization is believed to be an effective technique for learning and understanding algorithms in traditional computer science. In this paper, we focus on parallel computing and algorithms. An inherent difficulty with parallel programming is that it requires synchronization and coordination of the concurrent activities. We want to use visualization to help students to understand how the processors work together in an algorithm and how they interact through communication. To conceptualize this we have used two different visualization techniques, computer animations and role plays. As the students can see how the processors run simultaneously in parallel, it illustrates important concepts such as processor load balance, serialization bottlenecks, synchronization and communication. The results show that both animations and role plays are better for learning and understanding algorithms than the textbook.} } @TechReport{ it:2004-022, author = {Pascal Van Hentenryck and Pierre Flener and Justin Pearson and Magnus {\AA}gren}, title = {Compositional Derivation of Symmetries for Constraint Satisfaction}, institution = it, department = csd, year = 2004, number = {2004-022}, month = may, abstract = {This paper reconsiders the problems of discovering symmetries in constraint satisfaction problems (CSPs). It proposes a compositional approach which derives symmetries of the applications from primitive constraints. Its key insight is the recognition of the special role of global constraints in symmetry detection. Once the symmetries of global constraints are available, it often becomes much easier to derive symmetries compositionally and efficiently. The paper demonstrates the potential of this approach by studying several classes of value and variable symmetries and applying the resulting techniques to two non-trivial applications. The paper also discusses the potential of reformulations and high-level modelling abstractions to strengthen symmetry discovery.} } @TechReport{ it:2004-021, author = {Erik Berg and Erik Hagersten}, title = {Efficient Data-Locality Analysis of Long-Running Applications}, institution = it, department = docs, year = 2004, number = {2004-021}, month = may, abstract = {Analysis of application data cache behavior is important for program optimization and architectural design decisions. Current methods include hardware monitoring and simulation, but these methods lack from either limited flexibility or large run-time overhead that prevents realistic workloads. This paper describes a new fast and flexible tool based on StatCache. This tool is based on a probabilistic cache model instead of a functional cache simulator and use sparsely sampled run-time information instead of complete traces or sampled contiguous subtraces. A post-run analyzer calculates miss ratios of fully associative caches of arbitrary size and cache line size, from statistics gathered at a single run. It can also produce various data-locality metrics and give data-structure centric data-locality figures. The implementation utilizes simple-hardware and operating-system support available in most operating systems and runs uninstrumented optimized code. We evaluate the method using the SPEC benchmark suite using the largest (ref) input sets and show that the accuracy is high. We also show the run-time overhead for this flexible ``cache simulator'' to be less than 20\% for long-running applications, much faster than current simulators.} } @TechReport{ it:2004-020, author = {Malin Ljungberg and Krister {\AA}hlander}, title = {Generic Programming Aspects of Symmetry Exploiting Numerical Software}, institution = it, department = tdb, year = 2004, number = {2004-020}, month = may, note = {To appear in the proceedings of the mini-symposium ``Software Concepts and Free Software for PDEs'' of the ECCOMAS 2004 congress, Jyv{\"a}skyl{\"a}, Finland, 24-28 July 2004}, abstract = {The use of the generalized Fourier transform as a means to diagonalize certain types of equivariant matrices, and thus speeding up the solution of numerical systems, is discussed. Such matrices may arise in various applications with geometrical symmetries, for example when the boundary element method is used to solve an electrostatic problem in the exterior of a symmetric object. The method is described in detail for an object with a triangular symmetry, and the feasibility of the method is confirmed by numerical experiments. The design of numerical software for this kind of applications is a challenge. It is argued that generic programming is very suitable in this context, mainly because it is type safe and promotes polymorphism capabilities in link time. A generic C++ design of important mathematical abstractions such as groups, vector spaces, and group algebras, is outlined, illustrating the potential provided by generative programming techniques. The integration of explicit support for various data layouts for efficiency tuning purposes is discussed. } } @TechReport{ it:2004-019, author = {Owe Axelsson and Maya Neytcheva}, title = {Eigenvalue Estimates for Preconditioned Saddle Point Matrices}, institution = it, department = tdb, year = 2004, number = {2004-019}, month = may, abstract = {New eigenvalue bounds for symmetric matrices of saddle point form are derived and applied for preconditioned versions of the matrices. The preconditioners enable efficient iterative solution of the corresponding linear systems with, for some important applications, an optimal order of computational complexity.} } @TechReport{ it:2004-018, author = {Emmanuel Papaioannou and Erik Bor{\"a}lv and Athanasios Demiris and Niklas Johansson and Nikolaos Ioannidis}, title = {User Interface Design for Multi-platform Interactive Sports Content Broadcasting}, institution = it, department = hci, year = 2004, number = {2004-018}, month = may, note = {Accepted to AVI 2004 (Advanced Visual Interfaces), Gallipoli (Lecce), Italy}, abstract = {The new generation of television viewers is currently being confronted and becoming acquainted with a series of technological developments in the realm of consumer electronics and gaming that raise their expectations for similar advances in TV broadcasts. The MELISA platform aims at the cross-media broadcasting of sports events featuring interactive advertising and sports-related games over digital television and next generation mobile network infrastructures. The platform provides services for optimal presentation of complex interactive real time video content, for advertisement and an advanced real-time gaming (betting) engine in at least two different client platforms. User interface design is a major issue in a complex end-to-end solution having to cater the needs of users ranging from broadcasting professionals to end-users. Especially in the case of interactive gaming there are numerous challenges in the user interface design, in order to deliver to all categories of devices (and end users) equal quantity and quality of information. In this paper we present the overall system architecture and philosophy and then focus on user interface design issues both for the routine work of broadcasting professionals as well as the end users, owners of different types of consumer devices (such as PDAs and interactive TV Set Top Boxes).} } @TechReport{ it:2004-017, author = {Parosh Aziz Abdulla and Aletta Nylen}, title = {Better-Structured Transition Systems}, institution = it, department = docs, year = 2004, number = {2004-017}, month = apr, abstract = {In automated verification of infinite-state systems, a variety of algorithms that operate on constraints representing sets of states have been developed. Many of these algorithms rely on well quasi-ordering of the constraint system for proving termination. A number of methods for generating new well quasi-ordered constraint systems have been proposed. However, many of these constraint systems suffer from constraint explosion as the number of constraints generated during analysis grows exponentially with the size of the problem. We suggest using the theory of better quasi-ordering to prove termination since that will allow generation of constraint systems that are less prone to constraint explosion. We also present a method to derive such constraint systems. We introduce existential zones, a constraint system for verification of systems with an unbounded number of clocks and use our methodology to prove that existential zones are better quasi-ordered. We show how to use existential zones in verification of timed Petri nets and present some experimental results. Finally, we present several other constraint systems which have been derived using our methodology.} } @TechReport{ it:2004-016, author = {Maya Neytcheva and Erik B{\"a}ngtsson and Bj{\"o}rn Lund}, title = {Numerical Solution Methods for Glacial Rebound Models}, institution = it, department = tdb, year = 2004, number = {2004-016}, month = apr, note = {Typographic corrections April 2005}, abstract = {We consider the finite element discretization of the system of partial differential equations describing the stress field and the displacements in a (visco)elastic inhomogeneous layered media in response to a surface load. The underlying physical phenomenon, which is modelled, is glacial advance and recession, and the resulting crustal stress state. We analyse the elastic case in more detail and present discretization error estimates. The so-obtained linear system of equations is solved by an iterative solution method with suitable preconditioning and numerical experiments are presented.} } @TechReport{ it:2004-015, author = {Magnus {\AA}gren}, title = {Set Variables and Local Search}, institution = it, department = csd, year = 2004, number = {2004-015}, month = apr, note = {Updated May 2004}, abstract = {Many combinatorial (optimisation) problems have natural models based on, or including, set variables and set constraints. This modelling device has been around for quite some time in the constraint programming area, and proved its usefulness in many applications. This paper introduces set variables and set constraints also in the local search area. It presents a way of representing set variables in the local search context, where we deal with concepts like transition functions, neighbourhoods, and penalty costs. Furthermore, some common set constraints and their penalty costs are defined. These constraints are later used to model three problems and some initial experimental results are reported.} } @TechReport{ it:2004-014, author = {P{\"a}r Samuelson and Bj{\"o}rn Halvarsson and Bengt Carlsson}, title = {Analysis of the Input-Output Couplings in a Wastewater Treatment Plant Model}, institution = it, department = syscon, year = 2004, number = {2004-014}, month = apr, abstract = {This paper considers the problem of channel interaction in multivariable systems. As an application, nitrate removal in the activated sludge process in a wastewater treatment plant is studied. To evaluate the degree of channel interaction, two different tools are compared; the well known Relative Gain Array (RGA) and the more recently developed Hankel Interaction Index Array (HIIA). The results of the analysis are discussed from a process knowledge point of view, and are also illustrated with some control experiments. The main conclusion is that both the analysis tools provide reasonable results in this case. The HIIA, however, gives a deeper insight about the actual cross couplings in the system. This insight may also be used in order to design suitable structured multivariable controllers.} } @TechReport{ it:2004-013, author = {Andreas Westling}, title = {Inter-Networking {MPLS} and {SelNet}}, institution = it, department = docs, year = 2004, number = {2004-013}, month = apr, note = {M.Sc. thesis}, abstract = {The growing amount of traffic on the Internet has lead to regular IP switching technique becoming more and more inadequate. Since more IP switching routers have to be purchased in order to co-operate with the traffic the costs are rising for the ISPs. This has lead to extensive research on how to lower the cost for the end-toend computation. One of the ways in doing so is to use a tag-switching approach. This thesis will give a brief introduction to two tag-switching architectures, MPLS and SelNet, and show how these can co-operate despite their differences.} } @TechReport{ it:2004-012, author = {Parosh Aziz Abdulla and Johann Deneuz and Pritha Mahata}, title = {Multi-Clock Timed Networks}, institution = it, department = docs, year = 2004, number = {2004-012}, month = apr, abstract = {We consider verification of safety properties for parameterized systems of timed processes, so called {\it timed networks}. A timed network consists of a finite state process, called a controller, and an arbitrary set of identical timed processes. In a previous work, we showed that checking safety properties is decidable in the case where each timed process is equipped with a single real-valued clock. It was left open whether the result could be extended to multi-clock timed networks. We show that the problem becomes undecidable when each timed process has two clocks. On the other hand, we show that the problem is decidable when clocks range over a discrete time domain. This decidability result holds when processes have any finite number of clocks.} } @TechReport{ it:2004-011, author = {Henrik Bj{\"o}rklund and Sven Sandberg and Sergei Vorobyov}, title = {Randomized Subexponential Algorithms for Infinite Games}, institution = it, department = csd, year = 2004, number = {2004-011}, month = apr, abstract = {The complexity of solving infinite games, including parity, mean payoff, and simple stochastic games, is an important open problem in verification, automata theory, and complexity theory. In this paper we develop an abstract setting for studying and solving such games, as well as related problems, based on function optimization over certain discrete structures. We introduce new classes of completely local-global (CLG) and recursively local-global (RLG) functions, and show that strategy evaluation functions for parity and simple stochastic games belong to these classes. We also establish a relation to the previously well-studied completely unimodal (CU) and local-global functions. A number of nice properties of CLG-functions are proved. In this setting, we survey several randomized optimization algorithms appropriate for CU-, CLG-, and RLG-functions. We show that the subexponential algorithms for linear programming by Kalai and Matousek, Sharir, and Welzl, can be adapted to optimizing the functions we study, with preserved subexponential expected running time. We examine the relations to two other abstract frameworks for subexponential optimization, the LP-type problems of Matousek, Sharir, Welzl, and the abstract optimization problems of G{\"a}rtner. The applicability of our abstract optimization approach to parity games builds upon a discrete strategy evaluation measure. We also consider local search type algorithms, and settle two nontrivial, but still exponential, upper bounds. As applications we address some complexity-theoretic issues including non-PLS-completeness of the problems studied.} } @TechReport{ it:2004-010, author = {Neil Ghani and Kidane Yemane and Bj{\"o}rn Victor}, title = {Relationally Staged Computations in Calculi of Mobile Processes}, institution = it, department = docs, year = 2004, number = {2004-010}, month = mar, note = {Accepted for CMCS 2004}, abstract = {We apply the recently developed techniques of higher order abstract syntax and functorial operational semantics to give a compositional and fully abstract semantics for the $\pi$-calculus equipped with open bisimulation. The key novelty in our work is the realisation that the sophistication of open bisimulation requires us to move from the usual semantic domain of presheaves over subcategories of $\mathbf{Set}$ to presheaves over subcategories of $\mathbf{Rel}$. This extra structure is crucial in controlling the renaming of extruded names and in providing a variety of different dynamic allocation operators to model the different binders of the $\pi$-calculus. } } @TechReport{ it:2004-009, author = {Henrik Johansson and Johan Steensland}, title = {A Characterization of a Hybrid and Dynamic Partitioner for {SAMR} Applications}, institution = it, department = tdb, year = 2004, number = {2004-009}, month = mar, abstract = {Significantly improving the scalability of large structured adaptive mesh refinement (SAMR) applications is challenging. It requires sophisticated capabilities for using the underlying parallel computer's resources in the most efficient way. This is non-trivial, since the basic conditions for how to allocate the resources change dramatically during run-time due to the dynamics inherent in these applications. This report presents a first characterization of a hybrid and dynamic partitioner for parallel SAMR applications. Specifically, we determine optimal parameter settings for trade-offs like communication vs.\ load balance and speed vs.\ quality. The key contribution is that the characterization enables the partitioner to respond accurately to stimuli from system and application state, and hence adapting to various SAMR scenarios. This potentially reduces run-time for large SAMR applications.} } @TechReport{ it:2004-008, author = {P{\"a}r Samuelsson and Bengt Carlsson}, title = {An Integrating Linearization Method for Static Input Nonlinearities}, institution = it, department = syscon, year = 2004, number = {2004-008}, month = mar, abstract = {This report considers a method for linearization of models containing static input nonlinearities in series with a linear model, so called Hammerstein models. The method is based on exact linearization via internal feedback and is performed by differentiating the nonlinearity with respect to the input signal. Using this approach, an integration is added in the loop gain of the linearized system via the internal feedback. Standard linear design strategies can be used in order to construct a control law for the resulting integrating linearized system. The method presented here should be seen as an alternative to the standard method that utilizes the inverse of the static nonlinearity. Particularly, in cases when analytic inversion is difficult, the presented method provides an attractive alternative. An extension of the method where the static nonlinearity is fed by both the input and output signals is also presented and illustrated with some simulation results.} } @TechReport{ it:2004-007, author = {Erik Nordstr{\"o}m and Per Gunningberg and Christian Tschudin}, title = {Comparison of Gateway Forwarding Strategies in Ad hoc Networks}, institution = it, department = docs, year = 2004, number = {2004-007}, month = mar, abstract = {This paper studies the efficient integration of gateway forwarding strategies in wireless ad hoc networks. The problem to solve is the forwarding to one or more gateways in an environment where there is no hierarchical addressing. We provide an overview of the challenges in this area and then compare the properties of two proposed forwarding strategies; traditional default routes and tunnels. We find that default route forwarding will not operate efficiently in a multi-hop environment and that it will, without modifications, operate incorrectly with multiple gateways. On the other hand, we find tunnel forwarding to be architecturally appealing with many properties that make it a suitable forwarding strategy with multiple gateways. We have implemented default route and tunnel forwarding in the AODV routing protocol. We verify in simulation that the incorrect forwarding of default routes has adverse effects on TCP and that the modifications we suggest improve performance, although they do not completely solve the problem. Our simulations also show that our tunnel implementation is efficient and works well with TCP, even with multiple gateways.} } @TechReport{ it:2004-006, author = {Henrik L{\"o}f and Markus Nord{\'e}n and Sverker Holmgren}, title = {Improving Geographical Locality of Data for Shared Memory Implementations of {PDE} Solvers}, institution = it, department = tdb, year = 2004, number = {2004-006}, month = feb, abstract = {On cc-NUMA multi-processors, the non-uniformity of main memory latencies motivates the need for co-location of threads and data. We call this special form of data locality, \emph{geographical locality}, as the non-uniformity is a consequence of the physical distance between the cc-NUMA nodes. In this article, we compare the well established method of exploiting the first-touch strategy using parallel initialization of data to an application-initiated page migration strategy as means of increasing the geographical locality for a set of important scientific applications. Four PDE solvers parallelized using OpenMP are studied; two standard NAS NPB3.0-OMP benchmarks and two kernels from industrial applications. The solvers employ both structured and unstructured computational grids. The main conclusions of the study are: (1) that geographical locality is important for the performance of the applications, (2) that application-initiated migration outperforms the first-touch scheme in almost all cases, and in some cases even results in performance which is close to what is obtained if all threads and data are allocated on a single node. We also suggest that such an application-initiated migration could be made fully transparent by letting the OpenMP compiler invoke it automatically.} } @TechReport{ it:2004-005, author = {H{\aa}kan Zeffer and Zoran Radovic and Oskar Grenholm and Erik Hagersten}, title = {Evaluation, Implementation and Performance of Write Permission Caching in the {DSZOOM} System}, institution = it, department = docs, year = 2004, number = {2004-005}, month = feb, note = {Updated June 2004}, abstract = {Fine-grained software-based distributed shared memory (SW-DSM) systems typically maintain coherence with in-line checking code at load and store operations to shared memory. The instrumentation overhead of this added checking code can be severe. This paper (1) shows that most of the instrumentation overhead in the fine-grained DSZOOM SW-DSM system is store related, (2) introduces a new write permission cache (WPC) technique that exploits spatial store locality and batches coherence actions at runtime, (3) evaluates WPC and (4) presents WPC results when implemented in a real SW-DSM system. On average, the WPC reduces the store instrumentation overhead in DSZOOM with 42~(67) percent for benchmarks compiled with maximum (minimum) compiler optimizations.} } @TechReport{ it:2004-004, author = {Torbj{\"o}rn Wigren}, title = {Recursive Prediction Error Identification of Nonlinear State Space Models}, institution = it, department = syscon, year = 2004, number = {2004-004}, month = jan, abstract = {A recursive prediction error algorithm for identification of systems described by nonlinear ordinary differential equation (ODE) models is presented. The model is a MIMO ODE model, parameterized with coefficients of a multi-variable polynomial that describes one component of the right hand side function of the ODE. It is explained why such a parameterization is a key to obtain a well defined algorithm, that does not suffer from singularities and over-parameterization problems. Furthermore, it is proved that the selected model can also handle systems with more complicated right hand side structure, by identification of an input-output equivalent system in the coordinate system of the selected states. The linear output measurements can be corrupted by zero mean disturbances that are correlated between measurements and over time. The disturbance correlation matrix is estimated on-line and need not be known beforehand. The algorithm is applied to live data from a system consisting of two cascaded tanks with free outlets. It is illustrated that the identification algorithm is capable of producing a highly accurate nonlinear model of the system, despite the fact that the right hand structure of the system has two nontrivial nonlinear components. A novel technique based on scaling of the sampling period that significantly improves the numerical properties of the algorithm is also disclosed.} } @TechReport{ it:2004-003, author = {Johan Elf and Per L{\"o}tstedt and Paul Sj{\"o}berg}, title = {Problems of High Dimension in Molecular Biology}, institution = it, department = tdb, year = 2004, number = {2004-003}, month = jan, note = {This paper was presented at the 19th GAMM-Seminar in Leipzig, January 23-25, 2003.}, abstract = {The deterministic reaction rate equations are not an accurate description of many systems in molecular biology where the number of molecules of each species often is small. The master equation of chemical reactions is a more accurate stochastic description suitable for small molecular numbers. A computational difficulty is the high dimensionality of the equation. We describe how it can be solved by first approximating it by the Fokker-Planck equation. Then this equation is discretized in space and time by a finite difference method. The method is compared to a Monte Carlo method by Gillespie. The method is applied to a four-dimensional problem of interest in the regulation of cell processes.} } @TechReport{ it:2004-002, author = {Pablo Giambiagi and Gerardo Schneider and Frank D. Valencia}, title = {On the Expressiveness of {CCS}-like Calculi}, institution = it, department = docs, year = 2004, number = {2004-002}, month = jan, abstract = {In the literature there are several CCS-like process calculi, or CCS variants, differing in the constructs for the specification of infinite behavior and in the scoping rules w.r.t. channel names. In this paper we study various representatives of these calculi based upon both their relative expressiveness and the decidability of divergence (i.e., the existence of a divergent computation). We regard any two calculi as being equally expressive iff for every process in each calculus, there exists a weakly bisimilar process in the other. By providing weak bisimilarity preserving mappings among the various variants, we show that in the context of relabeling-free and finite summation calculi: (1) CCS with parameterless (or constant) definitions is equally expressive to the variant with parametric definitions. (2) The CCS variant with replication is equally expressive to that with recursive expressions and static scope. We also state that the divergence problem is undecidable for the calculi in (1) but decidable for those in (2). We obtain this from previous (un)decidability results and by showing the relevant mappings to be computable and to preserve divergence and its negation. From (1) and the well-known fact that parametric definitions can replace injective relabelings, we show that injective relabelings are redundant (i.e., derived) in CCS (which has constant definitions only).} } @TechReport{ it:2004-001, author = {Torsten S{\"o}derstr{\"o}m and Torbj{\"o}rn Wigren and Emad Abd-Elrady}, title = {Maximum Likelihood Modeling of Orbits of Nonlinear {ODE}s}, institution = it, department = syscon, year = 2004, number = {2004-001}, month = jan, abstract = {This report treats a new approach to the problem of periodic signal estimation. The idea is to model the periodic signal as a function of the state of a second order nonlinear ordinary differential equation (ODE). This is motivated by Poincare theory which is useful for proving the existence of periodic orbits for second order ODEs. The functions of the right hand side of the nonlinear ODE are then parameterized, and a maximum likelihood algorithm is developed for estimation of the parameters of these unknown functions from the measured periodic signal. The approach is analyzed by derivation and solution of a system of ODEs that describes the evolution of the Cramer-Rao bound over time. The proposed methodology reduces the number of estimated unknowns at least in cases where the actual signal generation resembles that of the imposed model. This in turn is expected to result in an improved accuracy of the estimated parameters.} } @TechReport{ it:2003-065, author = {Michael Baldamus and Richard Mayr and Gerardo Schneider}, title = {A Backward/Forward Strategy for Verifying Safety Properties of Infinite-State Systems}, institution = it, department = docs, year = 2003, number = {2003-065}, month = dec, abstract = {This paper has two main contributions: For one, we describe a general method for verifying safety properties of non-well-quasi-ordered infinite-state systems for which reachability is undecidable in general, the question being whether a set U of configurations is reachable. In many cases this problem can be solved as follows: First, one constructs a well-quasi-ordered overapproximation of the system in question. Thereby one can compute an overapproximation of the set Pre*(U) of all predecessors of U. Second, one performs an exact bounded forward search for U (starting at the initial state) which always stays inside the already computed overapproximation of Pre*(U), thus curbing the search space. This restricted forward search is more efficient than a normal forward search, yielding answers of the form YES, NO, or UNKNOWN, where the YES and NO answers are always correct. As our second main contribution herein, we apply our method to relabelling-free CCS with finite summation, which is already a process calculus for which reachability is undecidable. To our knowledge, this part is actually the first application of well-structered systems to verifying safety properties in process calculi. The application is done via a special Petri nets semantics for the calculus that we consider.} } @TechReport{ it:2003-064, author = {Arnim Br{\"u}ger and Bertil Gustafsson and Per L{\"o}tstedt and Jonas Nilsson}, title = {High Order Accurate Solution of the Incompressible Navier-Stokes Equations}, institution = it, department = tdb, year = 2003, number = {2003-064}, month = dec, abstract = {High order methods are of great interest in the study of turbulent flows in complex geometries by means of direct simulation. With this goal in mind, the incompressible Navier-Stokes equations are discretized in space by a compact fourth order finite difference method on a staggered grid. The equations are integrated in time by a second order semi-implicit method. Stable boundary conditions are implemented and the grid is allowed to be curvilinear in two space dimensions. In every time step, a system of linear equations is solved for the velocity and the pressure by an outer and an inner iteration with preconditioning. The convergence properties of the iterative method are analyzed. The order of accuracy of the method is demonstrated in numerical experiments. The method is used to compute the flow in a channel, the driven cavity and a constricted channel.} } @TechReport{ it:2003-063, author = {Michael Baldamus and Joachim Parrow and Bj{\"o}rn Victor}, title = {Spi Calculus Translated to $\pi$-Calculus Preserving May-Testing}, institution = it, department = docs, year = 2003, number = {2003-063}, month = dec, abstract = {We present a concise and natural encoding of the spi-calculus into the more basic $\pi$-calculus and establish its correctness with respect to a formal notion of testing. This is particularly relevant for security protocols modelled in spi since the tests can be viewed as adversaries. The translation has been implemented in a prototype tool. As a consequence, protocols can be described in the spi calculus and analysed with the emerging flora of tools already available for $\pi$. The translation also entails a more detailed operational understanding of spi since high level constructs like encryption are encoded in a well known lower level. The formal correctness proof is nontrivial and interesting in its own; so called context bisimulations and new techniques for compositionality make the proof simpler and more concise.} } @TechReport{ it:2003-062, author = {Emilio Tuosto and Bj{\"o}rn Victor and Kidane Yemane}, title = {Polyadic History-Dependent Automata for the Fusion Calculus}, institution = it, department = docs, year = 2003, number = {2003-062}, month = dec, abstract = {We extend History Dependent Automata to handle polyadic labels, and using a new symbolic semantics of fusion calculus we give a mapping into these Polyadic HDA with Negative Transitions, and show that the mapping is adequate with respect to hyperequivalence in the fusion calculus. This lays the grounds for HD-automata-based tools applicable not only to the monadic pi-calculus but also to the fusion calculus and polyadic pi-calculus, allowing implementation efforts to be focused at a foundational level rather than being multiplied in several tools.} } @TechReport{ it:2003-061, author = {Lars Ferm and Per L{\"o}tstedt}, title = {Space-Time Adaptive Solution of First Order {PDE}s}, institution = it, department = tdb, year = 2003, number = {2003-061}, month = dec, abstract = {An explicit time-stepping method is developed for adaptive solution of time-dependent partial differential equations with first order derivatives. The space is partitioned into blocks and the grid is refined and coarsened in these blocks. The equations are integrated in time by a Runge-Kutta-Fehlberg method. The local errors in space and time are estimated and the time and space steps are determined by these estimates. The error equation is integrated to obtain global errors of the solution. The method is shown to be stable if one-sided space discretizations are used. Examples such as the wave equation, Burgers' equation, and the Euler equations in one space dimension with discontinuous solutions illustrate the method.} } @TechReport{ it:2003-060, author = {Pierre Flener}, title = {Realism in Project-Based Software Engineering Courses: Rewards, Risks, and Recommendations}, institution = it, department = csd, year = 2003, number = {2003-060}, month = dec, abstract = {A software engineering course is often the capstone of a general undergraduate curriculum in computer science. It is usually at least partly a project-based course, with the intention that student groups can deploy their already acquired skills on programming, verification, databases, and human-computer interaction, while applying the new material about requirements, architecture, and project management on a project. I have taught a software engineering course six times over the last few years, using a combination of ideas that I have never seen elsewhere, with a strong emphasis on realism. I here reflect on the rewards and risks of this approach, and make some recommendations for future offerings.} } @TechReport{ it:2003-059, author = {Jonas Persson and von Sydow, Lina}, title = {Pricing European Multi-asset Options Using a Space-time Adaptive FD-method}, institution = it, department = tdb, year = 2003, number = {2003-059}, month = dec, abstract = {In this paper we present an adaptive technique to solve the multi-dimensional Black-Scholes equation. The number of grid-points required for a given tolerance of the local discretization errors is reduced substantially when compared to a standard equidistant grid. Using our adaptive methods in space and time we have control of the local discretization errors and can refine the grid where needed for accuracy reasons.} } @TechReport{ it:2003-058, author = {Erik Berg and Erik Hagersten}, title = {StatCache: A Probabilistic Approach to Efficient and Accurate Data Locality Analysis}, institution = it, department = docs, year = 2003, month = dec, number = {2003-058}, abstract = {The widening memory gap reduces performance of applications with poor data locality. This problem can be analyzed using working-set graphs. Current methods to generate such graphs include set sampling and time sampling, but cold start effects and unrepresentative set selection impair accuracy. In this paper we present StatCache, a novel sample-based method that can perform data-locality analysis on realistic workloads. During the execution of an application, sparse discrete memory accesses are sampled, and their reuse distances are measured using a simple watchpoint mechanism. StatCache uses the information collected from a single run to accurately estimate miss ratios of fully-associative caches of arbitrary sizes and generates working-set graphs. We evaluate StatCache using the SPEC CPU2000 benchmarks and show that StatCache gives accurate results with a sampling rate as low as $10^{-4}$. We also provide a proof-of-concept implementation, and discuss potentially very fast implementation alternatives.} } @TechReport{ it:2003-057, author = {Erik Berg and Erik Hagersten}, title = {Low-Overhead Spatial and Temporal Data Locality Analysis}, institution = it, department = docs, year = 2003, month = dec, number = {2003-057}, abstract = {Performance is getting increasingly sensitive to cache behavior because of the growing gap between processor cycle time and memory latency. To improve performance, applications need to be optimized for data locality. Run-time analysis of spatial and temporal data locality can be used to facilitate this and should help both manual tuning and feedback-based compiler optimizations. Identifying cache behavior of individual data structures further enhances the optimization process. Current methods to perform such analysis include simulation combined with set sampling or time sampling, and hardware monitoring. Sampling often suffers from either poor accuracy or large run-time overhead, while hardware measurements have limited flexibility. We present DLTune, a prototype tool that performs spatial and temporal data-locality analysis in run time. It measures both spatial and temporal locality for the entire application and individual data structures in a single run, and effectively exposes poor data locality based on miss ratio estimates of fully-associative caches. The tool is based on an elaborate and novel sampling technique that allows all information to be collected in a single run with an overall sampling rate as low as one memory reference in ten million and an average slowdown below five on large workloads.} } @TechReport{ it:2003-056, author = {Parosh Aziz Abdulla and Johann Deneux and Pritha Mahata and Aletta Nyl{\'e}n}, title = {Forward Reachability Analysis of Timed Petri Nets}, institution = it, department = docs, year = 2003, number = {2003-056}, month = dec, abstract = {We consider verification of safety properties for concurrent real-timed systems modelled by timed Petri nets, by performing symbolic forward reachability analysis. We introduce a formalism, called \emph{region generators} for representing sets of markings of timed Petri nets. Region generators characterize downward closed sets of regions. Downward closed languages provide exact abstractions of sets of reachable states with respect to safety properties. We show that the standard operations needed for performing symbolic reachability analysis are computable for region generators. Since forward reachability analysis is necessarily incomplete, we introduce an acceleration technique to make the procedure terminate more often on practical examples. We have implemented a prototype for analyzing timed Petri nets and used it to verify a parameterized version of Fischer's protocol. We also used the tool to generate a finite-state abstraction of the protocol.} } @TechReport{ it:2003-055, author = {Martin Nilsson}, title = {Rapid Solution of Parameter-Dependent Linear Systems for Electromagnetic Problems in the Frequency Domain}, institution = it, year = 2003, number = {2003-055}, month = nov, abstract = {The Minimal Residual Interpolation method reduces the number of iterations in an iterative method for multiple right hand sides. It uses computed solutions to initialize an iterative solver with an accurate guess. This paper demonstrates the efficiency of the method for frequency sweeps and solving scattering problems by plane waves incident from multiple angles. A bound on the number of solutions required for plane wave scattering before the remaining solutions are obtained by Minimal Residual Interpolation only is given. We discuss the performance of the method compared to iterative seed techniques. In a numerical example a reduction factor of $60$ is obtained on the number of matrix vector multiplications.} } @TechReport{ it:2003-054, author = {Martin Nilsson}, title = {Stability of the Fast Multipole Method for {H}elmholtz Equation in Three Dimensions}, institution = it, department = tdb, year = 2003, number = {2003-054}, month = nov, abstract = {Stability limits for the diagonal forms approximating the free space Green's function in Helmholtz' equation are derived. It is shown that while the original approximation of the Green's function is stable except for overflows, the diagonalized form becomes unstable due to errors from roundoff, interpolation, choice of quadrature rule and approximation of the translation operator. Numerical experiments validate the theoretical estimates.} } @TechReport{ it:2003-053, author = {Erik B{\"a}ngtsson and Maya Neytcheva}, title = {Approaches to Reduce the Computational Cost when Solving Linear Systems of Equations Arising in Boundary Element Method Discretizations}, institution = it, department = tdb, year = 2003, number = {2003-053}, month = nov, abstract = {Preconditioned iterative solution methods are compared with the direct Gaussian elimination method to solve dense linear systems $A\mathbf{x} = \mathbf{b}$ which originate from crack propagation problems, modeled and discretized by boundary element (BEM) techniques. Numerical experiments are presented and compared with the direct solution method available in a commercial BEM package. The experiments show that the preconditioned iterative schemes are competitive compared to the direct solver with respect to both arithmetic operations required and memory demands.} } @TechReport{ it:2003-052, author = {Magnus Sv{\"a}rd and Jan Nordstr{\"o}m}, title = {Well Posed Boundary Conditions for the {N}avier-{S}tokes Equations}, institution = it, department = tdb, year = 2003, number = {2003-052}, month = nov, note = {Updated March 2004}, abstract = {In this article we propose a general procedure that allows us to determine both the number and type of boundary conditions for time dependent partial differential equations. With those, well posedness can be proven for a general initial-boundary value problem. The procedure is exemplified on the linearised Navier-Stokes equations in two and three space dimensions on a general domain.} } @TechReport{ it:2003-051, author = {Pavel Krcal and Wang Yi}, title = {Decidable and Undecidable Problems in Schedulability Analysis Using Timed Automata}, institution = it, department = docs, year = 2003, number = {2003-051}, month = nov, abstract = {We study schedulability problems of timed systems with non-uniformly recurring computation tasks. Assume a set of real time tasks whose best and worst execution times, and deadlines are known. We use timed automata to describe the arrival patterns (and release times) of tasks. From the literature, it is known that the schedulability problem for a large class of such systems is decidable and can be checked efficiently. In this paper, we provide a summary on what is decidable and what is undecidable in schedulability analysis using timed automata. Our main technical contribution is that the schedulability problem will be undecidable if these two conditions hold: (1) the execution times of tasks are intervals and (2) a task is allowed to reset clocks. We show that if one of the above two conditions is dropped, the problem will be decidable again. Thus our result can be used as an indication in identifying classes of timed systems that can be analysed efficiently.} } @TechReport{ it:2003-050, author = {Alexandre David}, title = {Hierarchical Modeling and Analysis of Timed Systems}, institution = it, department = docs, year = 2003, opttype = {PhD thesis}, number = {2003-050}, month = nov, note = {PhD thesis}, abstract = {\textsc{Uppaal} is a tool for model-checking real-time systems developed jointly by Uppsala University and Aalborg University. It has been applied successfully in case studies ranging from communication protocols to multimedia applications. The tool is designed to verify systems that can be modeled as networks of timed automata. But it lacks support for systems with hierarchical structures, which makes the construction of large models difficult. In this thesis we improve the efficiency of \textsc{Uppaal} with new data structures and extend its modeling language and its engine to support hierarchical constructs. To investigate the limits of \textsc{Uppaal}, we model and analyze an industrial fieldbus communication protocol. To our knowledge, this case study is the largest application \textsc{Uppaal} has been confronted to and we managed to verify the models. However, the hierarchical structure of the protocol is encoded as a network of automata without hierarchy, which artificially complicates the model. It turns out that we need to improve performance and enrich the modeling language. To attack the performance bottlenecks, we unify the two central structures of the \textsc{Uppaal} engine, the passed and waiting lists, and improve memory management to take advantage of data sharing between states. We present experimental results that demonstrate improvements by a factor 2 in time consumption and a factor 5 in memory consumption. We enhance the modeling capabilities of \textsc{Uppaal} by extending its input language with hierarchical constructs to structure the models. We have developed a verification engine that supports modeling of hierarchical systems without penalty in performance. To further benefit from the structures of models, we present an approximation technique that utilizes hierarchy in verification. Finally, we propose a new architecture to integrate the different verification techniques into a common framework. It is designed as a pipeline built with components that are changed to fit particular experimental configurations and to add new features. The new engine of \textsc{Uppaal} is based on this architecture. We believe that the architecture is applicable to other verification tools.} } @TechReport{ it:2003-049, author = {Martin Nilsson}, title = {A Parallel Shared Memory Implementation of the Fast Multipole Method for Electromagnetics}, institution = it, department = tdb, year = 2003, number = {2003-049}, month = oct, abstract = {An implementation of the multilevel Fast Multipole method for time harmonic electromagnetic computations is presented. The method is parallelized for shared memory machines. A new parallelization scheme which is a hybrid between previous methods is proposed. Several symmetries and other methods that reduce the storage requirement are proposed. The most important symmetry is for the translation operators. A method for reducing the CPU-time in the interpolation is also proposed.} } @TechReport{ it:2003-048, author = {Timour Katchaounov and Tore Risch}, title = {Interface Capabilities for Query Processing in Peer Mediator Systems}, institution = it, department = csd, year = 2003, number = {2003-048}, month = sep, abstract = {A peer mediator system (PMS) is a decentralized mediator system based on the P2P paradigm, where mediators integrate data sources and other mediators through views defined in a multi-mediator query language. In a PMS mediator peers compose views in terms of views in other peers - mediators and sources, or directly pose queries in the multi-mediator query language to some peer. All peers are fully autonomous and there is no central catalog or controller. Each peer in a PMS must provide an interface to its data and meta-data sufficient to allow the cooperative processing of queries by the PMS. We analyze the computational capabilities and meta-data that a software system has to export in order to participate as a peer in a PMS. For the analysis we identify and compare six classes of peer interfaces with increasing complexity. For each class we investigate the performance and scalability implications that result from the available capabilities and required meta-data. Our results are two-fold: \emph{i)} we provide guidelines for the design of mediator peers that can make best use of the interfaces provided by the data sources, and \emph{ii)} we analyze the tradeoffs in the design of inter-mediator interfaces so that mediator peers can efficiently cooperate to process queries against other composed mediators. Finally we describe the choices made in a concrete implementation of a PMS.} } @TechReport{ it:2003-047, author = {Jimmy Flink}, title = {Simuleringsmotor f{\"o}r t{\aa}gtrafik med st{\"o}d f{\"o}r experimentell konfiguration}, institution = it, department = hci, year = 2003, number = {2003-047}, month = sep, note = {M.Sc. thesis. In Swedish}, abstract = {A train simulator has been developed using Java and XML. The purpose is to use it as a tool in the process to build prototypes of future train driver interfaces. This work has been done within the LINS project at the division of human-computer interaction at Uppsala University. The LINS project aims to investigate issues about the train drivers' existing and future information environment. Prototypes of train driver interfaces is to be developed and one of the tasks is to study the impact of the train driver's situation awareness compared to the current ATC system and the upcoming ERTMS/ETCS. } } @TechReport{ it:2003-046, author = {Mathias Spjuth and Martin Karlsson and Erik Hagersten}, title = {The Elbow Cache: A Power-Efficient Alternative to Highly Associative Caches}, institution = it, department = docs, year = 2003, number = {2003-046}, month = sep, abstract = {Increasing the associativity is a common way to reduce the performance-detrimental conflicts in a cache. From a dynamic cache power perspective this associativity comes at a high cost. In this paper we present miss ratio performance and dynamic power estimates for a skewed cache and also for the organization proposed in this paper, the elbow cache. We will show that by extending a skewed cache organization with a relocation strategy we can obtain a miss ratio that is comparable to the miss ratio of an 8-way set-associative cache, while consuming up to 48\% less dynamic power.} } @TechReport{ it:2003-045, author = {Sven-Olof Nystr{\"o}m}, title = {A Polyvariant Type Analysis for {E}rlang}, institution = it, department = csd, year = 2003, number = {2003-045}, month = sep, abstract = {This paper presents a type analysis for the programming language Erlang. The analysis computes interprocedural control-flow and data-flow information, and should be applicable to any higher-order functional programming language with call-by-value semantics. The analysis uses a novel method for polyvariance, \emph{static limiting}, where an approximation of the call graph is analyzed to determine whether a function should be treated as polyvariant or monovariant. A general framework for polyvariant analysis is presented. This framework is used for experimental investigations to evaluate the cost and potential benefits of polyvariant analysis and to compare different approaches to polyvariant analysis. The experimental results show that static limiting gives the same or better precision as the other polyvariant analyses, while having more predictable analysis times. However, the experiments show only small improvements in precision for the various polyvariant analyses. } } @TechReport{ it:2003-044, author = {Dan Wallin and Henrik Johansson and Sverker Holmgren}, title = {Cache Memory Behavior of Advanced {PDE} Solvers}, institution = it, year = 2003, number = {2003-044}, month = aug, note = {A short version of this paper will appear in the proceedings of \emph{Parallel Computing 2003} (ParCo2003), Dresden, Germany}, abstract = {Three different partial differential equation (PDE) solver kernels are analyzed in respect to cache memory performance on a simulated shared memory computer. The kernels implement state-of-the-art solution algorithms for complex application problems, and the simulations are performed for data sets of realistic size. The performance of the studied applications benefits from much longer cache lines than normally found in commercially available computer systems. The reason for this is that numerical algorithms are carefully coded and have regular memory access patterns. These programs take advantage of spatial locality and the amount of false sharing is limited. A simple sequential hardware prefetch strategy, providing cache behavior similar to a large cache line, could potentially yield large performance gains for these applications. Unfortunately, such prefetchers often lead to additional address snoops in multiprocessor caches. However, applying a bundle technique, which lumps several read address transactions together, this large increase in address snoops can be avoided. For all studied algorithms, both the address snoops and cache misses are largely reduced in the bundled prefetch protocol.} } @TechReport{ it:2003-043, author = {Kajsa Ljungberg and Sverker Holmgren and {\"O}rjan Carlborg}, title = {Simultaneous Search for Multiple {QTL} Using the Global Optimization Algorithm {DIRECT}}, institution = it, department = tdb, year = 2003, number = {2003-043}, month = aug, abstract = {\textbf{Motivation:} Epistatic interactions are important for quantitative traits. To maximize the power to detect epistatic quantitative trait loci (QTLs), a simultaneous search is necessary. The computational complexity demands that the traditional exhaustive search be replaced by a more efficient global optimization algorithm. \textbf{Results:} We have adapted DIRECT, an algorithm presented in \cite{Jones93}, to the problem of simultaneous mapping of two and three QTL. We have compared DIRECT, in terms of accuracy and speed analyzing real data sets, with standard exhaustive search and a genetic algorithm previously used for QTL mapping in two dimensions. In all two- and three-QTL test cases, DIRECT accurately finds the global optimum two to four orders of magnitude faster than when using an exhaustive search, and one order of magnitude faster than when using the genetic algorithm. A search using a model with three fully interacting QTL is finished in six CPU minutes when using DIRECT, while an exhaustive search takes 142 CPU days. Thus three-QTL randomization testing for determining empirical significance thresholds is made feasible by the use of DIRECT. This opens the possibility to thoroughly investigate the power of simultaneous search to detect at least three interacting QTL. \textbf{Availability:} The source code of the prototype implementation is available at \url{http://www.tdb.uu.se/~kl/qtl_software.html}. \textbf{Contact:} kl@tdb.uu.se} } @TechReport{ it:2003-042, author = {Gerardo Schneider}, title = {Invariance Kernels of Polygonal Differential Inclusions}, institution = it, department = docs, year = 2003, number = {2003-042}, month = aug, note = {Updated October 2003}, abstract = {Polygonal hybrid systems are a subclass of planar hybrid automata which can be represented by piecewise constant differential inclusions (SPDIs). Here, we identify and compute an important object of SPDIs' phase portrait, namely \emph{invariance kernels}. An \emph{invariant set} is a set of initial points of trajectories which keep rotating in a cycle forever and the \emph{invariance kernel} is the largest of such sets. We show that this kernel is a non-convex polygon and we give a non-iterative algorithm for computing the coordinates of its vertices and edges.} } @TechReport{ it:2003-041, author = {Wendy Kress}, title = {A Compact Fourth Order Time Discretization Method for the Wave Equation}, institution = it, department = tdb, year = 2003, number = {2003-041}, month = aug, abstract = {A fourth order accurate discretization in time and space for the wave equation in first order system formulation is investigated. The unconditional stability of the scheme is established and the performance of the scheme is compared to a second order accurate scheme for a smooth one dimensional problem and it is also applied to a two dimensional problem with piecewise constant coefficients. } } @TechReport{ it:2003-040, author = {Wendy Kress}, title = {Error Estimates for Deferred Correction Methods in Time}, institution = it, department = tdb, year = 2003, number = {2003-040}, month = aug, abstract = {In this paper, we consider the deferred correction principle for high order accurate time discretization of partial differential equations (PDEs) and ordinary differential equations (ODEs). Deferred correction is based on a lower order method, here we use second order accurate A-stable methods. Solutions of higher order accuracy are computed successively. The computational complexity for calculating higher order solutions is comparable to the complexity of the lower order method. There is no stability restraint on the size of the time-step. Error estimates are derived and the application of the schemes to initial boundary value problems is discussed in detail. The theoretical results are supported by a series of numerical experiments.} } @TechReport{ it:2003-039, author = {Therese Berg and Bengt Jonsson and Martin Leucker and Mayank Saksena}, title = {Insights to Angluin's Learning}, institution = it, department = docs, year = 2003, number = {2003-039}, month = aug, abstract = {Among other domains, learning finite-state machines is important for obtaining a model of a system under development, so that powerful formal methods such as model checking can be applied. A prominent algorithm for learning such devices was developed by Angluin. We have implemented this algorithm in a straightforward way to gain further insights to practical applicability. Furthermore, we have analyzed its performance on randomly generated as well as real-world examples. Our experiments focus on the impact of the alphabet size and the number of states on the needed number of membership queries. Additionally, we have implemented and analyzed an optimized version for learning prefix-closed regular languages. Memory consumption is one major obstacle when we attempted to learn large examples. We see that prefix-closed languages are relatively hard to learn compared to arbitrary regular languages. The optimization, however, shows positive results. } } @TechReport{ it:2003-038, author = {Henrik Bj{\"o}rklund and Sven Sandberg and Sergei Vorobyov}, title = {On Fixed-Parameter Complexity of Infinite Games}, institution = it, department = csd, year = 2003, number = {2003-038}, month = aug, abstract = {We investigate and classify fixed parameter complexity of several infinite duration games, including Rabin, Streett, Muller, parity, mean payoff, and simple stochastic, using different natural parameterizations. Most known fixed parameter intractable games are PSPACE- or EXP-complete classically, AW[*] or XP-hard parametrically, and are all finite duration games. In contrast, the games we consider are infinite duration, solvable in positional or finite memory strategies, and belong to ``lower'' complexity classes, like NP and/or coNP. However, the best known algorithms they possess are of complexity $n^{f(k)}$, i.e., XP is the only upper bound, with no known parametric lower bounds. We demonstrate that under different parameterizations these games may have different or equivalent FPT-statuses, and present several tractable and intractable cases.} } @TechReport{ it:2003-037, author = {Dan Wallin and Erik Hagersten}, title = {Bundling: Reducing the Overhead of Multiprocessor Prefetchers}, institution = it, department = docs, year = 2003, number = {2003-037}, month = aug, abstract = {Prefetching has proven useful for reducing cache misses in multiprocessors at the cost of increased coherence traffic. This is especially troublesome for snooping-based systems, where the available coherence bandwidth often is the scalability bottleneck. The new bundling technique, introduced in this paper, reduces the overhead caused by prefetching by two means: piggybacking prefetches with normal requests; and, requiring only one device on the ``bus'' to perform a snoop lookup for each prefetch transaction. This paper describes bundling implementations for three important transaction types: reads, upgrades and downgrades. While bundling could reduce the overhead of most existing prefetch schemes, the evaluation of bundling performed in this paper has been limited to two of them: sequential prefetching and Dahlgren's adaptive sequential prefetching. Both schemes have their snoop bandwidth cut in about half for all the commercial and scientific benchmarks studied. The combined effect of bundling applied to these fairly naive prefetch schemes lowers the cache miss rate, the address bandwidth, as well as the snoop bandwidth compared with no prefetching for all applications - a result never demonstrated before. Bundling, however, will not reduce the data bandwidth introduced by a prefetch scheme. We argue, however, that the data bandwidth is more easily scaled than the snoop bandwidth for snoop-based coherence systems.} } @TechReport{ it:2003-036, author = {Emad Abd-Elrady and Torsten S{\"o}derstr{\"o}m and Torbj{\"o}rn Wigren}, title = {Periodic Signal Modeling Based on Li{\'e}nard's Equation}, institution = it, department = syscon, year = 2003, number = {2003-036}, month = aug, abstract = {The problem of modeling periodic signals is considered. The approach taken here is motivated by the well known theoretical results on the existence of periodic orbits for Li{\'e}nard systems and previous results on modeling periodic signals by means of second order nonlinear ordinary differential equations (ODEs). The approach makes use of the appropriate conditions imposed on the polynomials of a Li{\'e}nard system to guarantee the existence of a unique and stable limit cycle. These conditions reduce the number of parameters required to generate accurate models for periodic signals.} } @TechReport{ it:2003-035, author = {Elisabeth Larsson and Bengt Fornberg}, title = {Theoretical and Computational Aspects of Multivariate Interpolation with Increasingly Flat Radial Basis Functions}, institution = it, department = tdb, year = 2003, number = {2003-035}, month = jun, abstract = {Multivariate interpolation of smooth data using smooth radial basis functions is considered. The behavior of the interpolants in the limit of nearly flat radial basis functions is studied both theoretically and numerically. Explicit criteria for different types of limits are given. Using the results for the limits, the dependence of the error on the shape parameter of the radial basis function is investigated. The mechanisms that determine the optimal shape parameter value are studied and explained through approximate expansions of the interpolation error.} } @TechReport{ it:2003-034, author = {Per L{\"o}tstedt and Alison Ramage and von Sydow, Lina and Stefan S{\"o}derberg}, title = {Preconditioned Implicit Solution of Linear Hyperbolic Equations with Adaptivity}, institution = it, department = tdb, year = 2003, number = {2003-034}, month = may, abstract = {This paper describes a method for solving hyperbolic partial differential equations using an adaptive grid: the spatial derivatives are discretised with a finite volume method on a grid which is structured and partitioned into blocks which may be refined and derefined as the solution evolves. The solution is advanced in time via a backward differentiation formula. The discretisation used is second order accurate and stable on Cartesian grids. The resulting system of linear equations is solved by GMRES at every time-step with the convergence of the iteration being accelerated by a semi-Toeplitz preconditioner. The efficiency of this preconditioning technique is analysed and numerical experiments are presented which illustrate the behaviour of the method on a parallel computer. } } @TechReport{ it:2003-033, author = {Daniel Noreland}, title = {A Gradient Based Optimisation Algorithm for the Design of Brass-Wind Instruments}, institution = it, department = tdb, year = 2003, number = {2003-033}, month = may, abstract = {This paper presents how the shape of a brass instrument can be optimised with respect to its intonation and impedance peak magnitudes. The instrument is modelled using a one-dimensional transmission line analogy with truncated cones. The optimisation employs the Levenberg-Marquardt method, with the gradient of the objective function obtained by analytic manipulation. Through the use of an appropriate choice of design variables, the optimisation is capable of rapidly finding smooth horn profiles.} } @TechReport{ it:2003-032, author = {Daniel Noreland}, title = {Impedance Boundary Conditions for Acoustic Waves in a Duct with a Step Discontinuity}, institution = it, department = tdb, year = 2003, number = {2003-032}, month = may, abstract = {This paper treats the use of numerically computed impedance boundary conditions for acoustic simulations. Such boundary conditions may be used to combine different methods on different parts of the computational domain. Impedance boundary conditions may be computed for each subproblem independently of each other. In order to develop insight into this approach, wave propagation in a rectangular waveguide with a step discontinuity is studied.} } @TechReport{ it:2003-031, author = {Daniel Noreland}, title = {Ill-Posedness of Absorbing Boundary Conditions Applied on Convex Surfaces}, institution = it, department = tdb, year = 2003, number = {2003-031}, month = may, abstract = {Absorbing boundary conditions are important in many applications where partial differential equations defined on infinite domains are solved numerically. A problem that has attracted interest recently is that perfectly matched layers layers (PML) for electro-magnetic FDTD simulations applied on convex surfaces may lead to instabilities. This paper shows that these problems are not restricted to electro-magnetic calculations, but common for problems described by the classical wave equation with absorbing boundary conditions on convex surfaces. It is shown that the instabilities are independent of the numerical implementation of the absorbing boundary condition, and instead a result of unphysical assumption in the formulation of the boundary condition. } } @TechReport{ it:2003-030, author = {M. Ekman and P. Samuelsson and B. Carlsson}, title = {Adaptive Control of the Nitrate Level in an Activated Sludge Process Using an External Carbon Source - Extended Version}, institution = it, department = syscon, year = 2003, number = {2003-030}, month = may, note = {A shorter version is also available in: Preprints of Reglerm{\"o}te 2002, Link{\"o}ping, Sweden, May 29--30 2002, pp 308--313, and another version in: Proc. of the 1st World Water Congress of the IWA. Melbourne, 2002 (poster presentation). Updated July 2003}, abstract = {In an activated sludge process for nitrogen removal, nitrate may be reduced to nitrogen gas by facultative heterotrophic bacteria in an anoxic environment. In order to guarantee sufficient supplies of readily biodegradable carbon compounds, an external carbon source often needs to be added. In this report, we present an automatic control strategy for controlling the nitrate level using an external carbon source. The external carbon source is added in the first anoxic compartment and the nitrate concentration in the last anoxic compartment is controlled. Key process parameters of a simplified Activated Sludge Model No. 1 are estimated on-line. The estimated parameters are used for updating the parameters of a linear quadratic controller. The strategy is illustrated in a simulation study using realistic influent data and is shown to perform well.} } @TechReport{ it:2003-029, author = {Oskar Grenholm and Zoran Radovic and Erik Hagersten}, title = {Latency-hiding and Optimizations of the {DSZOOM} Instrumentation System}, institution = it, department = docs, year = 2003, number = {2003-029}, month = may, abstract = {An efficient and robust instrumentation tool (or compiler support) is necessary for an efficient implementation of fine-grain software-based shared memory systems (SW-DSMs). The DSZOOM system, developed by the Uppsala Architecture Research Team (UART) at Uppsala University, is a sequentially consistent fine-grained SW-DSM originally developed using Executable Editing Library (EEL)---a binary modification tool from University of Wisconsin-Madison. In this paper, we identify several weaknesses of this original approach and present a new and simple tool for assembler instrumentation: Sparc Assembler Instrumentation Tool (SAIT). This tool can instrument (modify) a highly optimized assembler output from the compiler for the newest UltraSPARC processors. Currently, the focus of the tool is load-, store-, and load-store-instrumentation. By using the SAIT, we develop and present several low-level instrumentation optimization techniques that significantly improve the performance of the original DSZOOM system. One of the presented techniques is a write permission cache (WPC), a latency-hiding mechanism for memory-store operations, that can lower the instrumentation overheads for some applications (as much as 45\% for LU-cont, running on two nodes with 8 processors each). Finally, we demonstrate that this new DSZOOM system executes faster than the old one for all 13 applications studied, from the SPLASH-2 benchmark suite. Execution time improvement factors range from 1.07 to 2.82 (average 1.73). } } @TechReport{ it:2003-028, author = {Erik K. Larsson}, title = {Limiting Properties of Sampled Stochastic Systems}, institution = it, department = syscon, year = 2003, number = {2003-028}, month = may, abstract = {The objective of this paper is to present some general properties of discrete-time systems originating from fast sampled continuous-time stochastic systems. In particular, some results concerning the zero locations and the innovations variance of fast sampled continuous-time autoregressive moving average (ARMA) systems will be stated. Knowledge of these properties is of importance and interest in various fast sampling applications, such as discrete-time simulation of continuous-time systems and identification of continuous-time systems using discrete-time measurements. The main contribution, however, is to provide some insight into questions like: when can simple approximate sampling schemes be applied accurately enough and what determines the characteristic dynamic of a fast sampled system. The results are illustrated by an extensive set of examples. } } @TechReport{ it:2003-027, author = {Per Carlsson and Arne Andersson and Fredrik Ygge}, title = {A Tractable Mechanism for Time Dependent Markets}, institution = it, department = csd, year = 2003, number = {2003-027}, month = apr, note = {Updated October 2004 with new complexity results}, abstract = {Markets with time dependent goods are special cases of multi commodity markets. An application area of high interest is day-ahead power markets. If these are to be opened for consumer side bidders and local production bidders, the number of actors on the market grows dramatically, and new market mechanisms and algorithms are needed. Another interesting application area with many similarities is bandwidth markets. The design of large flexible markets with time dependent goods is a computational challenge. In this paper we present a computationally tractable mechanism for time dependent markets. By a number of predefined bid types, it offers useful flexibility to the bidders. We present the market mechanism and the corresponding matching algorithm together with some analysis of its behaviour.} } @TechReport{ it:2003-026, author = {Henrik L{\"o}f and Zoran Radovic and Erik Hagersten}, title = {{THROOM} --- Running {POSIX} Multithreaded Binaries on a Cluster}, institution = it, department = docs, year = 2003, number = {2003-026}, month = apr, abstract = {Most software distributed shared memory systems (SW-DSMs) lack industry standard interfaces that limit their applicability to a small set of shared-memory applications. In order to gain general acceptance, SW-DSMs should support the same look-and-feel of shared memory as hardware DSMs. This paper presents a runtime system concept that enables unmodified POSIX P1003.1c (Pthreads) compliant binaries to run transparently on clustered hardware. The key idea is to extend the single process model of multi-threading to a multi-process model where threads are distributed to processes executing in remote nodes. The distributed threads execute in a global shared address space made coherent by a fine-grain SW-DSM layer. We also present THROOM, a proof-of-concept implementation that runs unmodified Pthread binaries on a virtual cluster modeled as standard UNIX processes. THROOM runs on top of the DSZOOM fine-grain SW-DSM system with limited OS support.} } @TechReport{ it:2003-025, author = {Torbj{\"o}rn Wigren and Torsten S{\"o}derstr{\"o}m}, title = {Second Order {ODE}s are Sufficient for Modeling of Many Periodic Signals}, institution = it, department = syscon, year = 2003, number = {2003-025}, month = apr, abstract = {Which is the minimum order an autonomous nonlinear ordinary differential equation (ODE) needs to have to be able to model a periodic signal? This question is motivated by recent research on periodic signal analysis, where nonlinear ODEs are used as models. The results presented here show that an order of two of the ODE is sufficient for a large class of periodic signals. More precisely, conditions on a periodic signal are established that imply the existence of an ODE that has the periodic signal as a solution. A criterion that characterizes the above class of periodic signals by means of the overtone contents of the signals is also presented. The reason why higher order ODEs are sometimes needed is illustrated with geometric arguments. Extensions of the theoretical analysis to cases with orders higher than two are developed using this insight.} } @TechReport{ it:2003-024, author = {Parosh Aziz Abdulla and Bengt Jonsson and Marcus Nilsson and Julien d'Orso}, title = {Algorithmic Improvements in Regular Model Checking}, institution = it, department = docs, year = 2003, number = {2003-024}, month = apr, note = {Extended version of paper accepted for publication in in CAV'2003}, abstract = {Regular model checking is a form of symbolic model checking for parameterized and infinite-state systems, whose states can be represented as finite strings of arbitrary length over a finite alphabet, in which regular sets of words are used to represent sets of states. In earlier papers, we have developed methods for computing the transitive closure (or the set of reachable states) of the transition relation, represented by a regular length-preserving transducer. In this paper, we present several improvements of these techniques, which reduce the size of intermediate approximations of the transitive closure: One improvement is to pre-process the transducer by {\em bi-determinization}, another is to use a more powerful equivalence relation for identifying histories (columns) of states in the transitive closure. We also present a simplified theoretical framework for showing soundness of the optimization, which is based on commuting simulations. The techniques have been implemented, and we report the speedups obtained from the respective optimizations.} } @TechReport{ it:2003-023, author = {Bertil Gustafsson and Per Wahlund}, title = {Time Compact Difference Methods for Wave Propagation in Discontinuous Media}, institution = it, department = tdb, year = 2003, number = {2003-023}, month = apr, abstract = {In an earlier paper by Gustafsson and Mossberg, a fourth order one-step method was constructed for the solution of wave propagation problems. The method is based on the first order system form of the PDE, and uses a staggered grid both in space and time. The method was also applied with good results to a problem with discontinuous coefficients without using any special procedure across the discontinuity. In this paper we will analyze a model problem from acoustics, and demonstrate the theoretical foundation for this behavior. Furthermore, we shall present more detailed numerical experiments which confirm the theoretical results.} } @TechReport{ it:2003-022, author = {Bharath Bhikkaji and Kaushik Mahata and Torsten S{\"o}derstr{\"o}m}, title = {Reduced order models for a two-dimensional heat diffusion system}, institution = it, department = syscon, year = 2003, number = {2003-022}, month = apr, abstract = {In this paper, a two-dimensional heat diffusion system, which is modeled by a partial differential equation (PDE) is considered. Finite order approximations, for the infinite order PDE model, are constructed first by a direct application of the standard finite difference approximation (FD) scheme. Using tools of linear algebra, the constructed FD approximate models are reduced to computationally simpler models without any loss of accuracy. Further, the reduced approximate models are modified by replacing its poles with their respective asymptotic limits. Numerical experiments suggest that the proposed modifications improve the accuracy of the approximate models.} } @TechReport{ it:2003-021, author = {Christian Tschudin and Richard Gold}, title = {LUNAR: Lightweight Underlay Network Ad-hoc Routing}, institution = it, department = docs, year = 2003, number = {2003-021}, month = apr, note = {Originally written January 2002; updated April 2002}, abstract = {In this paper we present an new ad hoc routing system based upon simple principles regarding the routing strategy and the implementation approach. In the routing area we (re-)introduce the end-to-end principle, letting the communicating end nodes make the decisions concerning the behaviour of intermediate nodes. We adopt a routing strategy that is a mixture of on-demand and pro-active routing in order to minimize the possible down-times of communication paths. Implementation-wise we use explicit ``resolution commands'' sent to neighbour nodes to provide LUNAR functionality. A freely available implementation has been produced that includes auto-configuration of IP network addresses and default gateway routing, making LUNAR a fully self-configuring ad-hoc routing solution which supports both unicast and broadcast styles of communication.} } @TechReport{ it:2003-020, author = {Christian Tschudin and Richard Gold}, title = {SelNet: A Translating Underlay Network}, institution = it, department = docs, year = 2003, number = {2003-020}, month = apr, note = {Originally written October 2001}, abstract = {The Internet has successfully promoted address uniformity and a node centric forwarding semantics. However, NAT and wireless networks among others have shown the advantage and the need of revising basic assumptions of the Internet model. In this paper we review several of these basic networking concepts and introduce a new set of network abstractions like ``membranes'' which are individual physical or virtual networks and ``wormholes'' which link one or more membranes together. This leads us to an active network architecture called SelNet that is based on tunnelling and translation mechanisms. Besides the architecture we present several network services and abstractions that can be built on top of it. A brief status report on a prototype implementation is also provided.} } @TechReport{ it:2003-019, author = {Henrik Bj{\"o}rklund and Sven Sandberg and Sergei Vorobyov}, title = {Randomized Subexponential Algorithms for Parity Games}, institution = it, department = csd, year = 2003, number = {2003-019}, month = apr, abstract = {The complexity of solving parity games is an important open problem in verification, automata theory, and complexity theory. In this paper we develop an abstract setting for studying parity games and related problems, based on function optimization over certain discrete structures. We introduce new classes of \emph{completely local-global (CLG)} and \emph{recursively local-global (RLG)} functions, and show that strategy evaluation functions for parity games belong to these classes. We also establish a relation to the previously well-studied \emph{completely unimodal (CU)} and \emph{local-global} functions. A number of nice properties of CLG-functions are proved. In this setting, we survey several randomized optimization algorithms appropriate for CU-, CLG-, and RLG-functions. We show that the subexponential algorithms for linear programming by Kalai and Matou\v{s}ek, Sharir, and Welzl, can be adapted to optimizing the functions we study, with preserved subexponential expected running time. We examine the relations to two other abstract frameworks for subexponential optimization, the LP-type problems of Matou{\v{s}}ek, Sharir, and Welzl, and the abstract optimization problems of G{\"a}rtner. The applicability of our abstract optimization approach to parity games builds upon a discrete strategy evaluation measure. We also consider local search type algorithms, and settle two nontrivial, but still exponential, upper bounds. As applications we address some complexity-theoretic issues including non-PLS-completeness of the problems studied.} } @TechReport{ it:2003-018, author = {Magnus Sv{\"a}rd and Ken Mattsson and Jan Nordstr{\"o}m}, title = {Steady State Computations Using Summation-by-Parts Operators}, institution = it, department = tdb, year = 2003, number = {2003-018}, month = mar, abstract = {This paper concerns energy stability on curvilinear grids and its impact on steady state calulations. We have done computations for the Euler equations using both high order summation-by-parts block and diagonal norm schemes. The calculations indicate the significance of energy stability in order to obtain convergence to steady state. Furthermore, the difference operators are improved such that faster convergence to steady state are obtained. The numerical experiments also reveal the importance of high quality grids when high order finite difference methods are used. } } @TechReport{ it:2003-017, author = {Henrik Bj{\"o}rklund and Sven Sandberg and Sergei Vorobyov}, title = {An Improved Subexponential Algorithm for Parity Games}, institution = it, department = csd, year = 2003, number = {2003-017}, month = mar, abstract = {We suggest a new algorithm for deciding parity games, a fundamental problem of unknown complexity (in \textsc{NP} $\cap$ \textsc{coNP}, not known to belong to \textsc{P}) in game, automata, complexity theories, combinatorial optimization, temporal logic of programs, computer-aided verification. The novelty of the algorithm consists in exploiting a special form of parity games with \emph{retreats}, where optimal retreat edges define \emph{absorbing facets} (with better values than their neighbors on complementary facets) in the strategy space. A superset of such absorbing facets can be found by standard random iterative improvement algorithms in expected polynomial time. Additional dual techniques are used to minimize this superset. As a result, the dimension of the problem shrinks, to which we finally apply the Kalai-Matou\v{s}ek-Sharir-Welzl-Ludwig-style randomization techniques we recently adapted for games [Bjorklund, Sandberg, Vorobyov, STACS'2003 and TR-2003-002] } } @TechReport{ it:2003-016, author = {Bertil Gustafsson and Eva Mossberg}, title = {Time Compact High Order Difference Methods for Wave Propagation}, institution = it, department = tdb, year = 2003, number = {2003-016}, month = mar, abstract = {We construct one-step explicit difference methods for solution of wave propagation problems with fourth order accuracy in both space and time by using a principle that can be generalized to arbitrary order. We use the first order system form and a staggered grid. The fourth order accuracy in time is obtained by transferring time derivatives in the truncation error to space derivatives. Discontinuous coefficients corresponding to interfaces between different materials are considered as a special case of variable coefficients, and the method is applied across the discontinuities. The accuracy is much improved compared to second order methods even for this type of problems. A certain norm is shown to be conserved, ensuring good accuracy even for long time integration.} } @TechReport{ it:2003-015, author = {Henrik Bj{\"o}rklund and Sven Sandberg}, title = {Algorithms for Combinatorial Optimization and Games Adapted from Linear Programming}, institution = it, department = csd, year = 2003, number = {2003-015}, month = mar, abstract = {The problem of maximizing functions from the boolean hypercube to real numbers arises naturally in a wide range of applications. This paper studies an even more general setting, in which the function to maximize is defined on what we call a hyperstructure. A hyperstructure is the Cartesian product of finite sets with possibly more than two elements. We also relax the codomain to any partially ordered set. Well-behaved such functions arise in game theoretic contexts, in particular from parity games (equivalent to the modal mu-calculus model checking) and simple stochastic games (Bj{\"o}rklund, Sandberg, Vorobyov 2003). We show how several subexponential algorithms for linear programming (Kalai 1992, Matousek, Sharir, Welzl 1992) can be adapted to hyperstructures and give a reduction to the abstract optimization problems introduced in (G{\"a}rtner1995).} } @TechReport{ it:2003-014, author = {Brahim Hnich and Julian Richardson and Pierre Flener}, title = {Towards Automatic Generation and Evaluation of Implied Constraints}, institution = it, department = csd, year = 2003, number = {2003-014}, month = mar, abstract = {Adding appropriate implied constraints can dramatically improve the computational behaviour of a constraint solver on the augmented set of constraints. We briefly document experiments we made with PRESS and clp(q,r) for generating implied constraints. This report is an excerpt (namely Sections 5.1, 5.2, and 5.3, which are included verbatim apart from minor cosmetic, non-technical corrections) from an unfinished technical report written in August 2000, and thus (finally) provides a citable, published reference to that work.} } @TechReport{ it:2003-013, author = {Ken Mattsson and Magnus Sv{\"a}rd and Jan Nordstr{\"o}m}, title = {Stable Artificial Dissipation Operators for High Order Finite Difference Schemes}, institution = it, department = tdb, year = 2003, number = {2003-013}, month = feb, abstract = {Stability for nonlinear convection problems using centered difference schemes require the addition of artificial dissipation. In this paper we present dissipation operators that preserve both stability and accuracy for high order finite difference approximations of initial boundary value problems.} } @TechReport{ it:2003-012, author = {Ken Mattsson and Jan Nordstr{\"o}m}, title = {Finite Difference Approximations of Second Derivatives on Summation by Parts Form}, institution = it, department = tdb, year = 2003, number = {2003-012}, month = feb, note = {Updated 2003-03-21}, abstract = {Finite difference approximations of second derivatives, which satisfy a summation by parts rule have been derived for the 4th, 6th and 8th order case, by using the symbolic mathematics software Maple. The operators are based on the same norms as the corresponding approximations of the first derivate, which make the construction of stable approximations to general parabolic problems straightforward. The error analysis show that the second derivative approximation can be closed at the boundaries with an approximation two orders less accurate than the internal scheme, and still preserve the internal accuracy. However, if an energy estimate cannot be obtained, the overall convergence rate is reduced by one order. } } @TechReport{ it:2003-011, author = {Alexandre David and Gerd Behrmann and Kim G. Larsen and Wang Yi}, title = {A Tool Architecture for the Next Generation of UPPAAL}, institution = it, department = docs, year = 2003, number = {2003-011}, month = feb, abstract = {We present the design of the model-checking engine and internal data structures for the next generation of UPPAAL. The design is based on a pipeline architecture where each stage represents one independent operation in the verification algorithms. The architecture is based on essentially one shared data structure to reduce redundant computations in state exploration, which unifies the so-called passed and waiting lists of the traditional reachability algorithm. In the implementation, instead of using standard memory management functions from general-purpose operating systems, we have developed a special-purpose storage manager to best utilize sharing in physical storage. We present experimental results supporting these design decisions. It is demonstrated that the new design and implementation improves the efficiency of the current distributed version of UPPAAL by about 60\% in time and 80\% in space.} } @TechReport{ it:2003-010, author = {Alexandre David and Johann Deneux and Julien d'Orso}, title = {A Formal Semantics for {UML} Statecharts}, institution = it, department = docs, year = 2003, number = {2003-010}, month = feb, abstract = {The UML language is a large set of notations and rules to describe different aspects of a system. It provides a set of diagrams to view the system from different angles: use case diagrams, class diagrams, statecharts diagrams, and deployment diagrams are some of them. In this report we are interested in the statecharts diagrams that describe dynamic behaviours. We give a formal semantics for a large subset of these statecharts, in particular we focus on the action language semantics. Our subset and semantics are very close to the one supported by the tool Rhapsody.} } @TechReport{ it:2003-009, author = {Alexandre David and M. Oliver M{\"o}ller and Wang Yi}, title = {Verification of {UML} Statecharts with Real-Time Extensions}, institution = it, department = docs, year = 2003, number = {2003-009}, month = feb, abstract = {We develop a formal model for hierarchical timed systems. The statechart-like hierarchy features parallelism on any level and connects superstate and substate via explicit entries and exits. Time is represented by clocks, invariants, and guards. For this formalism we give an operational semantics that is appropriate for the verification of universal timed computation tree logic (TCTL) properties. Our model is strongly related to the timed automata dialect as present in the model checking tool UPPAAL. Here networks of timed automata are enriched with shared variables, hand-shake synchronization, and urgency. We describe a flattening procedure that translates our formalism into a network of UPPAAL timed automata. This flattening preserves a correspondence of the sets of legal traces. Therefor the translation can be used to establish properties in the hierarchical model. As a case study, we use the standard UML modeling example of a cardiac pacemaker. We model it in our hierarchical language, flatten it to UPPAAL input, and use the latter for a formal analysis. Our formalism remains decidable with respect to TCTL properties. In general the encoding of statecharts requires an abstraction step, which is not covered by this article. } } @TechReport{ it:2003-008, author = {Julian Richardson and Pierre Flener}, title = {Program Schemas as Proof Methods}, institution = it, department = csd, year = 2003, number = {2003-008}, month = feb, abstract = {Automatic proof and automatic programming have always enjoyed a close relationship. We present a unification of proof planning (a knowledge-based approach to automated proof) and schema-guided synthesis (a knowledge-based approach to automatic programming). This unification enhances schema-guided synthesis with features of proof planning, such as the use of heuristics and the separation between object-level and meta-level reasoning. It enhances proof planning with features of schema-guided synthesis, such as reuse. It allows program schemas and heuristics to be implemented as proof planning methods. We aim particularly at implementation within the lambda-Clam proof planner, whose higher-order features are particularly appropriate for synthesis. Program synthesis and satisfaction of its proof obligations, such as verification conditions, matchings, or simplifications, are thus neatly integrated.} } @TechReport{ it:2003-007, author = {Henrik Brand{\'e}n and Sverker Holmgren and Per Sundqvist}, title = {Discrete Fundamental Solution Preconditioning for Hyperbolic Systems of {PDE}}, institution = it, department = tdb, year = 2003, number = {2003-007}, month = feb, abstract = {We present a new preconditioner for the iterative solution of linear systems of equations arising from discretizations of systems of first order partial differential equations (PDEs) on structured grids. Such systems occur in many important applications, including compressible fluid flow and electormagnetic wave propagation. The preconditioner is a truncated convolution operator, with a kernel that is a fundamental solution of a difference operator closely related to the original discretization. Analysis of a relevant scalar model problem in two spatial dimensions shows that grid independent convergence is obtained using a simple one-stage iterative method. As an example of a more involved problem, we consider the steady state solution of the non-linear Euler equations in a two dimensional, non-axisymmetric duct. We present results from numerical experiments, verifying that the preconditioning technique again achieves grid independent convergence, both for an upwind discretization and for a centered second order discretization with fourth order artificial viscosity. } } @TechReport{ it:2003-006, author = {Henrik Brand{\'e}n and Per Sundqvist}, title = {An Algorithm for Computing Fundamental Solutions of Difference Operators}, institution = it, department = tdb, year = 2003, number = {2003-006}, month = feb, abstract = {We propose an FFT-based algorithm for computing fundamental solutions of difference operators with constant coefficients. Our main contribution is to handle cases where the symbol has zeros.} } @TechReport{ it:2003-005, author = {Samuel Sundberg}, title = {Solving the linearized Navier-Stokes equations using semi-Toeplitz preconditioning}, institution = it, department = tdb, year = 2003, number = {2003-005}, month = feb, abstract = {A semi-Toeplitz preconditioner for the linearized Navier--Stokes equation for compressible flow is proposed and tested. The preconditioner is applied to the linear system of equations to be solved in each time step of an implicit method. The equations are solved with flat plate boundary conditions and are linearized around the Blasius solution. The grids are stretched in the normal direction to the plate and the quotient between the time step and the space step is varied. The preconditioner works well in all tested cases and outperforms the method without preconditioning both in number of iterations and execution time.} } @TechReport{ it:2003-004, author = {Kaushik Mahata and Torsten S{\"o}derstr{\"o}m}, title = {Subspace estimation of real-valued sine wave frequencies}, institution = it, department = syscon, year = 2003, number = {2003-004}, month = jan, abstract = {Subspace based estimation of multiple real-valued sine wave frequencies is considered in this paper. A novel data covariance model is proposed. In the proposed model the dimension of the signal subspace equals the number of frequencies present in the data, which is half of the signal subspace dimension for the conventional model. Consequently, an ESPRIT like algorithm using the proposed data model is presented. The proposed algorithm is then extended for the case of complex-valued sine waves. Performance analysis of the proposed algorithms are also carried out. The algorithms are tested in numerical simulations. The newly proposed algorithm outperforms ESPRIT by a significant margin and is computationally efficient as well. } } @TechReport{ it:2003-003, author = {Magnus Sv{\"a}rd and Jan Nordstr{\"o}m}, title = {A Stable and Accurate Summation-by-Parts Finite Volume Formulation of the Laplacian Operator}, institution = it, department = tdb, year = 2003, number = {2003-003}, month = jan, abstract = {Our objective is to analyse a commonly used edge based finite volume approximation of the Laplacian and construct an accurate and stable way to implement boundary conditions. Of particular interest are general unstructured grids where the strength of the finite volume method is fully utilised. As a model problem we consider the heat equation. We analyse the Cauchy problem in one and several space dimensions and we prove stability on unstructured grids. Next, the initial-boundary value problem is considered and a scheme is constructed in a summation-by-parts framework. The boundary conditions are imposed in a stable and accurate manner, using a penalty formulation. Numerical computations of the wave equation in two-dimensions are performed, verifying stability and order of accuracy for structured grids. However, the results are not satisfying for unstructured grids. Further investigation reveals that the approximation is not consistent for general unstructured grids. However, grids consisting of equilateral polygons recover the convergence. } } @TechReport{ it:2003-002, author = {Henrik Bj{\"o}rklund and Sven Sandberg and Sergei Vorobyov}, title = {On Combinatorial Structure and Algorithms for Parity Games}, institution = it, department = csd, year = 2003, number = {2003-002}, month = jan, abstract = {In this paper we identify and systematically explore the combinatorial structure underlying parity and simple stochastic games. We introduce the class of Completely LG (local-global) functions with nice structural properties pertinent to games and allowing for efficient optimization by iterative improvement local search style algorithms. We demonstrate several important combinatorial properties of Completely LG functions, allowing for many optimization algorithms, and establish a relation with the subclass of Completely Unimodal functions, studied by Hammer et al. [1988] Williamson Hoke [1988], and Wiedemann [1985]. We also describe a new, compared to our recent [STACS'2003], subexponential randomized algorithm for CU-functions, CLG-functions, parity, and simple stochastic games, and establish a relation with the class of LP-type problems introduced and investigated by Sharir \& Welzl [1992] and Matousek, Sharir \& Welzl [1992]. } } @TechReport{ it:2003-001, author = {Parosh Abdulla and Johann Deneux and Pritha Mahata and Aletta Nyl{\'e}n}, title = {Downward Closed Language Generators}, institution = it, department = docs, year = 2003, number = {2003-001}, month = jan, abstract = {We use downward closed languages for representing sets of states when performing forward reachability analysis on infinite-state systems. Downward closed languages are often more succinct than exact representations of the set of reachable states. We introduce a formalism for representing downward closed languages, called \emph{downward closed language generators (dlgs)}. We show that standard set operations needed for performing symbolic reachability analysis are computable for dlgs. Using a class of hierarchically defined dlgs, we have implemented a prototype for analysing timed Petri nets and used it to analyze a parameterized version of Fischer's protocol. We also show how dlgs can be used for uniform representation of formalisms previously presented for models such as Petri nets and lossy channel systems. } } @TechReport{ it:2002-041, author = {Per L{\"o}tstedt and Martin Nilsson}, title = {A Minimum Residual Interpolation Method for Linear Equations with Multiple Right Hand Sides}, institution = it, department = tdb, year = 2002, number = {2002-041}, month = dec, annote = {AMS subject classification (MSC2000): 65F10, 65R20, 78M05}, abstract = {An efficient method for solution of systems of linear equations with many right hand sides is developed. The right hand sides are assumed to depend smoothly on a parameter. The equations are solved by an iterative method and a linear least squares approximation is used as initial guess. The work spent on the iterations is bounded independently of the number of right hand sides. The method is applied to the solution of Maxwell's equations of electromagnetics in the frequency domain. The efficiency of the method is illustrated by computing the monostatic radar cross section around an aircraft model. } } @TechReport{ it:2002-040, author = {Torsten S{\"o}derstr{\"o}m}, title = {Why are errors-in-variables problems often tricky?}, institution = it, department = syscon, year = 2002, number = {2002-040}, month = nov, abstract = {There are several identification methods designed for the errors-in-variables problem. In this paper we focus on discussing the underlying assumptions for several of these methods. Some assumptions are shown to have far reaching consequences. For example, if the noise-free input happens to be periodic, simple estimators that give consistent parameter estimates of the system parameters can easily be designed. If the variances of the input and output noises are unknown, some structural assumption must be added for the system dynamics to be identifiable. On the other hand, should the ratio between output noise variance and input noise variance be known, it is possible not only to estimate the system parameters consistently, but also to combine this with a reasonable estimate of the unperturbed input. } } @TechReport{ it:2002-039, author = {R. Blaheta and S. Margenov and M. Neytcheva}, title = {Uniform estimate of the constant in the strengthened {CBS} inequality for anisotropic non-conforming {FEM} systems}, institution = it, department = tdb, year = 2002, number = {2002-039}, month = nov, abstract = {Preconditioners based on various multilevel extensions of two-level finite element methods (FEM) lead to iterative methods which have an optimal order computational complexity with respect to the size of the system. Such methods were first presented in \cite{AV1,AV2}, and are based on (recursive) two-level splittings of the finite element space. The key role in the derivation of optimal convergence rate estimates plays the constant $\gamma$ in the so-called Cauchy-Bunyakowski-Schwarz (CBS) inequality, associated with the angle between the two subspaces of the splitting. It turns out that only existence of uniform estimates for this constant is not enough and accurate quantitative bounds for $\gamma$ have to be found as well. More precisely, the value of the upper bound for $\gamma\in (0,1)$ is a part of the construction of various multilevel extensions of the related two-level methods. In this paper an algebraic two-level preconditioning algorithm for second order elliptic boundary value problems is constructed, where the discretization is done using Crouzeix-Raviart non-conforming linear finite elements on triangles. An important point to make is that in this case the finite element spaces corresponding to two successive levels of mesh refinements are not nested. To handle this, a proper two-level basis is considered, which enables us to fit the general framework for the construction of two-level preconditioners for conforming finite elements and to generalize the method to the multilevel case. The major contribution of this paper is the derived estimates of the related constant $\gamma$ in the strengthened CBS inequality. These estimates are uniform with respect to both coefficient and mesh anisotropy. Up to our knowledge, the results presented in the paper are the first for non-conforming FEM systems. } } @TechReport{ it:2002-038, author = {Parosh Aziz Abdulla and Alexander Rabinovich}, title = {Verification of Probabilistic Systems with Faulty Communication}, institution = it, department = docs, year = 2002, number = {2002-038}, month = nov, abstract = {Many protocols are designed to operate correctly even in the case where the underlying communication medium is faulty. To capture the behaviour of such protocols, \textit{lossy channel systems (LCS)} \cite{AbJo:lossy:IC} have been proposed. In an LCS the communication channels are modelled as FIFO buffers which are unbounded, but also unreliable in the sense that they can nondeterministically lose messages. Recently, several attempts \cite{BaEn:plcs,ABIJ:problossy} have been made to study \textit{probabilistic Lossy Channel Systems (PLCS)} in which the probability of losing messages is taken into account. In this paper, we consider a variant of PLCS which is more realistic than those studied in \cite{BaEn:plcs,ABIJ:problossy}. More precisely, we assume that during each step in the execution of the system, each message may be lost with a certain predefined probability. We show that for such systems the following model checking problem is decidable: to verify whether a given property definable by finite state $\omega$-automata holds with probability one. We also consider other types of faulty behavior, such as corruption and duplication of messages, and insertion of new messages, and show that the decidability results extend to these models. } } @TechReport{ it:2002-037, author = {Bob Melander and Mats Bj{\"o}rkman}, title = {Trace-Driven Network Path Emulation}, institution = it, department = docs, year = 2002, number = {2002-037}, month = nov, abstract = {This paper reports on on-going work where a trace-driven approach to network path emulation is investigated. Time stamped probe packets are sent along a network path whereby a probe packet trace can be generated. It basically contains the send times and the one-way delays/loss indications of the probe packets. Inside the emulator, the probe packet trace is used by a loss model and a delay model. These determine if a packet should be dropped or what the delay of the packet should be. Three loss models and three delay models are evaluated. For non-responsive UDP-based flows, the trace-driven loss and delay models that determine loss and delay based on loss-rates and delay distribution parameters calculated across the probe packet trace using a small gliding window are found to perform best. For adaptive TCP flows, none of the evaluated trace-driven models performs well. Instead, the Bernoulli loss model and an independent average delay model performs best.} } @TechReport{ it:2002-036, author = {Bernhard M{\"u}ller}, title = {Control Errors in {CFD!}}, institution = it, department = tdb, year = 2002, number = {2002-036}, month = oct, note = {Invited lecture to appear in the Proceedings of the 20th Aerospace Numerical Simulation Symposium at the National Aerospace Laboratory of Japan, held in Tokyo on July 3-5, 2002}, abstract = {Error control in computational fluid dynamics (CFD) has been crucial for reliability and efficiency of numerical flow simulations. The roles of truncation and rounding errors in difference approximations are discussed. Truncation error control is reviewed for ODEs. For difference approximations of PDEs, discretization error control by Richardson extrapolation is outlined. Applications to anisotropic grid adaptation in CFD are shown. Alternative approaches of error control in CFD are mentioned.} } @TechReport{ it:2002-035, author = {Ken Mattsson and Magnus Sv{\"a}rd and Mark Carpenter and Jan Nordstr{\"o}m}, title = {Accuracy Requirements for Steady and Transient Aerodynamics}, institution = it, department = tdb, year = 2002, number = {2002-035}, month = oct, abstract = {A numerical study on the benefit of high order methods is performed. Numerical computations of solutions governed by the Euler equations are performed. The propagation of a vortex convected through an empty domain, computations of steady state solutions around a NACA0012 airfoil and vortex-airfoil interaction are considered. These computations show that high order methods are often required in order to capture the significant flow features, especially for transient problems. } } @TechReport{ it:2002-034, author = {Stefan Johansson}, title = {Numerical Solution of the Linearized Euler Equations Using High Order Finite Difference Operators with the Summation by Parts Property}, institution = it, department = tdb, year = 2002, number = {2002-034}, month = oct, note = {Updated version, March 2003, available at \url{http://www.it.uu.se/research/publications/reports/2002-034/2002-034-updated} (PDF and Postscript)}, abstract = {We have used high order finite difference methods with the summation by parts property (SBP) on the 1D linearized Euler equations. The boundary conditions are imposed with both the projection method and the simultaneous approximation term method (SAT) for comparison. The formal fourth order of accuracy of the high order SBP operator was verified with both the projection method and the SAT method. Some relatively large errors were observed at the artificial boundaries and further investigations are needed to improve the non-reflecting boundary conditions. } } @TechReport{ it:2002-033, author = {Henrik Bj{\"o}rklund and Sven Sandberg and Sergei Vorobyov}, title = {Memoryless Determinacy of Parity and Mean Payoff Games: A Simple Proof}, institution = it, department = csd, year = 2002, number = {2002-033}, month = oct, note = {Updated journal version, March 2003, available at \url{http://www.it.uu.se/research/publications/reports/2002-033/2002-033-journal}} , abstract = {We give a simple, direct, and constructive proof of memoryless determinacy for Parity and Mean Payoff Games. First, we prove by induction that the finite-duration versions of these games, played until some vertex is repeated, are determined and both players have memoryless winning strategies. In contrast to [Ehrenfeucht-Mycielski79], our proof does not refer to the infinite-duration versions. Second, we show that the results straightforwardly generalize to infinite duration games.} } @TechReport{ it:2002-032, author = {Carl {\AA}borg and Anders Billing}, title = {Health Problems with Electronic Document Handling: a longitudinal study}, institution = it, department = hci, year = 2002, number = {2002-032}, month = oct, abstract = {This study shows that electronic document handling (EDH) systems can result in increased risks of work-related musculoskeletal disorders, especially ``mouse-arm syndrome'', and stress-related mental and somatic symptoms. The effects of introducing an EDH system on the physical and psychosocial work environment, as well as on self-reported health and well being, were studied at four Swedish work places where clerical duties are performed. Data were collected on three separate occasions: before and 6 and 18 months after the introduction of the EDH system. The methods used were interviews, observation interviews, questionnaires, video recordings, technical measurements and expert observation and examination. The results showed an increase in time spent at visual display units (VDUs) and longer periods without taking a break at the VDUs after introduction of the EDH system. Moreover, after the introduction of the EDH system, all of the data collection methods indicated (a) an increase in workload, (b) a greater number of repetitive and monotonous tasks, (c) participants felt more constrained, (d) a higher frequency in static work postures and (e) a diminution in task variability. The questionnaires revealed an increase in musculoskeletal symptoms and in psychological and psychosomatic complaints. The introduction of EDH systems can improve the effectiveness of work over the short run, but in order not to risk the health of the users an ergonomic strategy for the design of work organization, work systems, computer systems, job tasks and workstations is necessary. Keywords: Information technology, electronic document handling, physical and psychosocial work environment, health and well being, musculoskeletal disorders. } } @TechReport{ it:2002-031, author = {Carl {\AA}borg and Elisabeth Fernstr{\"o}m and Mats Ericson}, title = {Telework Work Environment and Well Being: A Longitudinal Study}, institution = it, department = hci, year = 2002, number = {2002-031}, month = oct, abstract = { The aim of this study is to analyse the effects that part-time telecommuting from home will have on office workers' physical and psychosocial work environment, at home and at the ordinary workplace, as well as on their general well being. Twenty-eight employees from two organizations were followed for a 2-year period. Diaries, interviews, and expert evaluation were used to collect data. Participants experienced part-time (1-2 days per week) telework from home as being helpful in obtaining a balance between work and leisure time, and in being more effective in their work. The participants worked more hours at home, worked long hours without breaks, and worked late at night and on the weekends at home. The workstation was seldom as good ergonomically at home as at the office. All the respondents experienced problems with the computer equipment and computer system. Conclusions: Introducing telework can enhance work effectiveness; however, it can also result in both an increase in workload and various work-related health risks. } } @TechReport{ it:2002-030, author = {Henrik Bj{\"o}rklund and Sven Sandberg and Sergei Vorobyov}, title = {An Experimental Study of Algorithms for Completely Unimodal Optimization}, institution = it, department = csd, year = 2002, number = {2002-030}, month = oct, abstract = {The paper presents an account of the experimental study of five different algorithms for the Completely Unimodal Pseudoboolean Function (CUPBF) Optimization. CUPBFs satisfy the Hirsch's conjecture, but are not known (although conjectured) to be polynomial. We summarize known and new upper and lower bounds, describe methods of random CUPBFs generation, and use them to compare the algorithms. } } @TechReport{ it:2002-029, author = {Bengt Eliasson}, title = {Domain Decomposition of the {P}ad{\'e} Scheme and Pseudo-Spectral Method, Used in {V}lasov Simulations}, institution = it, department = tdb, year = 2002, number = {2002-029}, month = oct, abstract = {In order to evaluate parallel algorithms for solving the Vlasov equation numerically in multiple dimensions, the algorithm for solving the one-dimensional Vlasov equation numerically has been parallelised. The one-dimensional Vlasov equation leads to a problem in the two-dimensional phase space $(x,v)$, plus time. The parallelisation is performed by domain decomposition to a rectangular processor grid. Derivatives in $x$ space are calculated by a pseudo-spectral method, where FFTs are used to perform discrete Fourier transforms. In velocity $v$ space a Fourier method is used, together with the compact Pad{\'e} scheme for calculating derivatives, leading to a large number of tri-diagonal linear systems to be solved. The parallelisation of the tri-diagonal systems in the Fourier transformed velocity space can be performed efficiently by the method of domain decomposition. The domain decomposition gives rise to Schur complement systems, which are tri-diagonal, symmetric and strongly diagonally dominant, making it possible to solve these systems with a few Jacobi iterations. Therefore, the parallel efficiency of the semi-implicit Pad{\'e} scheme is comparable to the parallel efficiency of explicit difference schemes. The parallelisation in $x$ space is less effective due to the FFTs used. The code has been tested on shared memory computers, on clusters of computers, and with the help of the Globus toolkit for communication over the Internet. } } @TechReport{ it:2002-028, author = {Bengt Eliasson}, title = {Numerical Modelling of the Two-Dimensional {V}lasov-{M}axwell System}, institution = it, department = tdb, year = 2002, number = {2002-028}, month = oct, abstract = {The two-dimensional Vlasov-Maxwell system for a plasma with mobile, magnetised electrons and ions, is investigated numerically. Special attention is paid to the conservation of the divergences of the electric and magnetic fields in the Maxwell equations. The Maxwell equations are rewritten, by means of the Lorentz potentials, in a form which conserves these divergences. Linear phenomena are investigated numerically and compared with theory and with previous numerical results. } } @TechReport{ it:2002-027, author = {Markus Nord{\'e}n and Sverker Holmgren and Michael Thun{\'e}}, title = {{OpenMP} versus {MPI} for {PDE} Solvers Based on Regular Sparse Numerical Operators}, institution = it, department = tdb, year = 2002, number = {2002-027}, month = oct, abstract = {Two parallel programming models represented by OpenMP and MPI are compared for PDE solvers based on regular sparse numerical operators. As a typical representative of such an operator, a finite difference approximation of the Euler equations for fluid flow is considered. The comparison of programming models is made with regard to uniform memory access (UMA), non-uniform memory access (NUMA), and self optimizing NUMA (NUMA-opt) computer architectures. By NUMA-opt, we mean NUMA systems extended with self optimization algorithms, in order to reduce the non-uniformity of the memory access time. The main conclusions of the study are: (1) that OpenMP is a viable alternative to MPI on UMA and NUMA-opt architectures, (2) that OpenMP is not competitive on NUMA platforms, unless special care is taken to get an initial data placement that matches the algorithm, and (3) that for OpenMP to be competitive in the NUMA-opt case, it is not necessary to extend the OpenMP model with additional data distribution directives, nor to include user-level access to the page migration library. } } @TechReport{ it:2002-026, author = {Henrik Bj{\"o}rklund and Sven Sandberg and Sergei Vorobyov}, title = {A Discrete Subexponential Algorithm for Parity Games}, institution = it, department = csd, year = 2002, number = {2002-026}, month = sep, abstract = { We suggest a new randomized algorithm for solving Parity Games with the worst case time complexity roughly \[\min\left(O\left( n^3 \cdot \left( \frac{n}{k}+1 \right)^k \right), \;2^{O(\sqrt{n\log n})}\right),\] where $n$ is the number of vertices and $k$ the number of colors of the game. Comparable with the previously known algorithms, which are efficient when the number of colors is small, it is subexponential when the number of colors is large, $k = \Omega(n^{1/2 + \varepsilon})$. } } @TechReport{ it:2002-025, author = {Paul Petterson and Wang Yi (eds.)}, title = {Workshop on Real-Time Tools (Proceedings)}, institution = it, department = docs, year = 2002, number = {2002-025}, month = jul, abstract = {Proceedings of the 2nd workshop on Real-Time Tools (RT-TOOLS), arranged in Copenhagen, Denmark, August 1 2002 in association with the 14th conference on Computer-Aided Verification (CAV).} } @TechReport{ it:2002-024, author = {Jakob Engblom and Magnus Nilsson}, title = {Time Accurate Simulation: Making a {PC} Behave Like a 8-Bit Embedded {CPU}}, institution = it, department = docs, year = 2002, number = {2002-024}, month = jul, abstract = {When developing embedded systems, developers often use simulation techniques to allow development to proceed without access to the target hardware. To make use of the high quality development tools available on the PC platform, one popular simulation method is to compile the code intended for the target system to run on the PC, allowing the development of the software to proceed on the PC without use of the final target system. For a distributed system, each target node is given its own process on the host PC, with software on the PC simulating the communications network. We have extended one such simulation environment to include the aspect of relative and absolute processing speed of the target systems, allowing for a more accurate simulation where not only functional but also timing-related bugs can be found and diagnosed. The absolute time mode makes the software on the PC run at the same speed as the real target, thus allowing the mixing of simulated nodes with real target hardware in the same system setup. The method is applicable to any embedded processor, as long as it is significantly slower than the PC. The system has been implemented and tested on standard PCs running the Windows NT operating system, and is currently being used in industrial projects. } } @TechReport{ it:2002-023, author = {Lars Ferm and Per L{\"o}tstedt}, title = {Anisotropic grid adaptation for Navier-Stokes' equations}, institution = it, department = tdb, year = 2002, number = {2002-023}, month = jun, abstract = {Navier-Stokes' equations are discretized in space by a finite volume method. Error equations are derived which are approximately satisfied by the errors in the solution. The dependence of the solution errors on the discretization errors is analyzed in certain flow cases. The grid is adapted based on the estimated discretization errors. The refinement and coarsening of the grid are anisotropic in the sense that it is different in different directions in the computational domain. The adaptation algorithm is applied to laminar, viscous flow over a flat plate, in a channel with a bump, and around a cylinder and an airfoil. } } @TechReport{ it:2002-022, author = {Henrik Lundgren and Erik Nordstr{\"o}m and Christian Tschudin}, title = {Coping with Communication Gray Zones in {IEEE} 802.11b based Ad hoc Networks}, institution = it, department = docs, year = 2002, number = {2002-022}, month = jun, abstract = {Our experiments with IEEE 802.11b based wireless ad hoc networks show that neighbor sensing with broadcast messages introduces ``communication gray zones'': in such zones data messages cannot be exchanged although the HELLO messages indicate neighbor reachability. This leads to a systematic mismatch between the route state and the real world connectivity, resulting in disruptive behavior for some ad hoc routing protocols. Concentrating on AODV we explore this issue and evaluate three different techniques to overcome the gray zone problem. We present quantitative measurements of these improvements and discuss the consequences for ad hoc routing protocols and their implementations. } } @TechReport{ it:2002-021, author = {Johan Runeson and Sven-Olof Nystr{\"o}m}, title = {Generalizing Chaitin's Algorithm: Graph-Coloring Register Allocation for Irregular Architectures}, institution = it, department = csd, year = 2002, number = {2002-021}, month = may, abstract = {We consider the problem of generalizing Chaitin-style graph-coloring register allocation to the irregular architectures used in embedded systems. A class of formal machine descriptions is introduced, capable of describing a wide range of irregular architectures. We extend Chaitin's register allocation algorithm to handle any architectural constraints that can be expressed using these machine descriptions. The generalized algorithm is applicable to a wider range of architectures and systems than any other adaptation of Chaitin's algorithm found in the literature. We argue that the modifications to the original algorithm can be combined with most important extensions to Chaitin's framework, for example coalescing and optimistic coloring. } } @TechReport{ it:2002-020, author = {Tobias Amnell and Elena Fersman and Paul Pettersson and Hongyan Sun and Wang Yi}, title = {Code Synthesis for Timed Automata}, institution = it, department = docs, year = 2002, number = {2002-020}, month = may, abstract = {We present a framework for development of real-time embedded systems based on the recently presented model of timed automata extended with real-time tasks. It has been shown previously that design problems such as reachability and schedulability are decidable for the model of timed automata with tasks. In this paper we describe how to automatically synthesise executable code with predictable timing behaviour, which is guaranteed to meet constraints (timing and other) imposed on the design model. To demonstrate the applicability of the framework, implemented in the Times tool, we present a case-study of a well known production cell, built in LEGO and controlled by a Hitachi H8 based LEGO Mindstorm control brick. } } @TechReport{ it:2002-019, author = {Erik B{\"a}ngtsson and Daniel Noreland and Martin Berggren}, title = {Shape optimization of an acoustic horn}, institution = it, department = tdb, year = 2002, number = {2002-019}, month = may, abstract = {Shape optimization of an acoustic horn is performed with the goal to minimize the portion of the wave that is reflected. The analysis of the acoustical properties of the horn is performed using a finite element method for the Helmholtz equation. The optimization is performed employing a BFGS Quasi-Newton algorithm, where the gradients are provided by solving the associated adjoint equations. To avoid local solutions to the optimization problem corresponding to irregular shapes of the horn, a filtering technique is used that applies smoothing to the design updates and the gradient. This smoothing technique can be combined with Tikhonov regularization. However, experiments indicate that regularization is redundant for the optimization problems we consider here. However, the use of smoothing is crucial to obtain sensible solutions. The smoothing technique we use is equivalent to choosing a representation of the gradient of the objective function in an inner product involving second derivatives along the design boundary. Optimization is performed for a number of single frequencies as well as for a band of frequencies. For single frequency optimization, the method shows particularly fast convergence with indications of super-linear convergence close to optimum. For optimization on a range of frequencies, a design was achieved providing a low and even reflection throughout the entire frequency band of interest.} } @TechReport{ it:2002-018, author = {Henrik Bj{\"o}rklund and Sven Sandberg and Sergei Vorobyov}, title = {Optimization on Completely Unimodal Hypercubes}, institution = it, department = csd, year = 2002, number = {2002-018}, month = may, abstract = {We investigate and compare, both theoretically and practically, several old and new algorithms for the completely unimodal pseudo-boolean function optimization problem. We settle the first nontrivial upper bounds for two well-known local search algorithms, the Random and the Greedy Single Switch algorithms. We also present the new Random Multiple and All Profitable Switches algorithms. These are not local search-type algorithms, and have not been previously considered for this problem. We justify the algorithms and show nontrivial upper bounds. In particular we prove a $O(2^{0.775n})$ bound for the Random Multiple Switches Algorithm, and also use random sampling to improve the bounds for all above algorithms. In particular we give a $O(2^{0.46n}) = O(1.376^n)$ upper bound for the Random Multiple Switches Algorithm with Random Sampling. We also show how Kalai-Ludwig's algorithm for Simple Stochastic Games \cite{Ludwig95} can be modified to solve the problem at hand, and that the modified algorithm preserves the subexponential, $2^{O(\sqrt{n})}$, running time. We introduce and justify a new method for random generation of `presumably hard' completely unimodal pseudo-boolean functions. We also present experimental results indicating that all the above algorithms perform well in practice. The experiments, surprisingly, show that in all generated example cases, the subexponential Kalai-Ludwig's algorithm is outperformed by all the other algorithms. } } @TechReport{ it:2002-017, author = {Eva Olsson and Stefan Seipel and Anders Jansson and Bengt Sandblad}, title = {The Windscreen Used as a Display for Navigation Information. An introductory study}, institution = it, department = hci, year = 2002, number = {2002-017}, month = may, abstract = {Grounding and collision are, next to fire, threats to a safe journey. The navigator needs to know exactly where the ship is and that the ship is on the expected route, which means that there is no risk for grounding, and finally that interference with surrounding traffic is avoided. Navigators may suffer from loss of important information, for instance in situations where visibility is reduced and in darkness. In addition to watching the surroundings, the navigator needs to monitor the radar screen and, more frequently, an electronic chart display (ECDIS). These displays provide information but the monitoring and managing of the displays may reduce the navigator's attention on the surrounding world. A recent accident report (Investigation report C4/1998) pointed out that adjustment of sea clutter required the navigator to go through five separate steps on the radar equipment. Accidents have also happened where the navigator in the course of events have been preoccupied with e.g. adjustment of radar screen clutter (MS Sleipner, 2000). The solution suggested here integrates information from a number of sources and presents safety-critical navigational information on the windscreen in front of the navigator. Such information can consist of markings for navigable channel, contours and waypoints of a pre-defined route, contours of shorelines and rocks, wrecks etc., and possibly heading and speed of surrounding vessels as well as information regarding the potential threat from an oncoming vessel. It is important that the projected objects are subtle and do not interfere with the navigator's focus on the real world. The different pieces of information presented in the navigator's visual field will be picked up from DGPS/GPS, transponders, radar, ARPA and electronic chart systems (ECDIS). In the experiment reported here a Plexiglas display covered with holographic film was used as the windscreen that safety critical information was projected on. Tracking equipment was used to let the navigator move freely and continuously have the enhanced information in the correct position, as an overlay on the real world. } } @TechReport{ it:2002-016, author = {Fredrik Edelvik}, title = {A New Technique for Accurate and Stable Modeling of Arbitrarily Oriented Thin Wires in the FDTD Method}, institution = it, department = tdb, year = 2002, number = {2002-016}, month = apr, abstract = {A subcell model for thin wires in the finite-difference time-domain (FDTD) method using modified telegraphers equations has been developed by Holland et al. In this paper we present an extension of their algorithm, which allows arbitrarily located and oriented wires with respect to the Cartesian grid. This is important to be able to accurately model wires that cannot be aligned to the Cartesian grid, e.g.\ tilted wires and circular loop wires. A symmetric coupling between field and wires yields a stable time-continuous field-wire system and the fully discrete field-wire system is stable under a CFL condition. The accuracy and excellent consistency of the proposed method are demonstrated for dipole and loop antennas with comparisons with the Method of Moments and experimental data. } } @TechReport{ it:2002-015, author = {Andreas Johnsson}, title = {Differentiated Security in Wireless Networks}, institution = it, department = docs, year = 2002, number = {2002-015}, month = apr, note = {M.Sc. thesis}, abstract = {This report presents a three step solution to the differentiated security and self-configuration problem in wireless network where users are able to come and go as they wish. Differentiated security is about giving different user's different access rights towards a surrounding infrastructure. Access rights can in some systems be bought and in others they may be related to a users location or real life status. Self-configuration is about minimizing the configuration effort needed by network users. First a generic architecture is presented, which provides an abstract solution without any requirements on specific techniques or tools. Thereafter a prototype specification and a prototype implementation are derived from the architecture. They show that the desired system is feasible and relatively easy to construct, using well known tools for resource discovery, security and address handling. } } @TechReport{ it:2002-014, author = {Samuel Sundberg and Lina von Sydow}, title = {Analysis of a semi-Toeplitz preconditioner for a convection-diffusion problem}, institution = it, department = tdb, year = 2002, number = {2002-014}, month = apr, abstract = {We have defined and analyzed a semi-Toeplitz preconditioner for time-dependent and steady-state convection-diffusion problems. The preconditioner exhibits very good theoretical convergence properties. The analysis is corroborated by numerical experiments. } } @TechReport{ it:2002-013, author = {Jakob Engblom}, title = {Effects of Branch Predictors on Execution Time}, institution = it, department = docs, year = 2002, number = {2002-013}, month = apr, abstract = {This technical report describes the result of a short experimental investigation into the effect of branch predictors on the execution times of tight loops on modern aggressive microprocessors. The same code was tried on Pentium III, Athlon, UltraSparc II, and UltraSparc III processors. For reference, the same experiment was carried out on a simple V850E processor to determine the behaviour without branch prediction. The results indicate that advanced branch predictors give a very high and hard-to-understand variation in the execution time of loops, and that this effect can be very big relative to the execution time of other instructions. } } @TechReport{ it:2002-012, author = {Lars Ferm and Per L{\"o}tstedt}, title = {Accurate and Stable Grid Interfaces for Finite Volume Methods}, institution = it, department = tdb, year = 2002, number = {2002-012}, month = mar, abstract = {A convection-diffusion equation is discretized by a finite volume method in two space dimensions. The grid is partitioned into blocks with jumps in the grid size at the block interfaces. Interpolation in the cells adjacent to the interfaces is necessary to be able to apply the difference stencils. Second order accuracy is achieved and the stability of the discretizations is investigated. The interface treatment is tested in the solution of the compressible Navier-Stokes equations. The conclusions from the scalar equation are valid also for these equations. } } @TechReport{ it:2002-011, author = {Sheayun Lee and Andreas Ermedahl and Sang Lyul Min and Naehyuck Chang}, title = {Statistical Derivation of an Accurate Energy Consumption Model for Embedded Processors}, institution = it, department = docs, year = 2002, number = {2002-011}, month = mar, abstract = {The energy consumption of software is becoming an increasingly important issue in designing mobile embedded systems where batteries are used as the main power source. As a consequence, recently, a number of promising techniques have been proposed to optimize software for reduced energy consumption. Such low-power software techniques require an energy consumption model that can be used to identify the factors contributing to the overall energy consumption. We propose a technique to derive an accurate energy consumption model by abstracting the energy behavior of the target processor. The proposed approach combines empirical measurement with a statistical analysis technique to approximate the actual energy consumption, whose result is a model equation that can be used to estimate software energy consumption. The model equation also provides insightful information that can be used in program optimization for low energy, by identifying the factors affecting the energy consumption of software. Experimental results show that the model equation can accurately estimate the energy consumption of a random instruction sequence, with an average error of 2.5\%. } } @TechReport{ it:2002-010, author = {Jonas Boustedt}, title = {Automated Analysis of Dynamic Web Services}, institution = it, year = 2002, number = {2002-010}, month = mar, note = {M.Sc. thesis}, abstract = {For a web application test-engineer, it would be convenient to have a map, in form of a graph, describing the functional topology of the application. In that way, it would be possible to analyse the possible paths which can be navigated to discover redundancies and circularities for example. A web spider tool can automate the construction of such a graph. The spider can request a document from the application, find all references to other documents in it, and explore them recursively until all the references have been analysed. However, web services often produce dynamic responses which means that the content cannot be distinctly represented by its reference, i.e., the responses must be classified in a way that matches the users perception. The main problem is to find suitable criteria for this classification. This study describes how to make such a tool and it surveys ideas for how to create a classifying identifier for dynamic responses. The implemented spider was used to make experiments on selected web services, using different models for web node identification. The result is a proposal of suitable criteria for classification of dynamic responses, coming from web applications. These criteria are implemented in algorithms which use the parse structure and the set of internal references as the dominant terms of identification. } } @TechReport{ it:2002-009, author = {Andy Bavier and Thiemo Voigt and Mike Wawrzoniak and Larry Peterson and Per Gunningberg}, title = {{SILK}: Scout Paths in the Linux Kernel}, institution = it, department = docs, year = 2002, number = {2002-009}, month = feb, abstract = {SILK stands for Scout In the Linux Kernel, and is a port of the Scout operating system to run as a Linux kernel module. SILK forms a replacement networking subsystem for standard Linux 2.4 kernels. Linux applications create and use Scout paths via the Linux socket interface with virtually no modifications to the applications themselves. SILK provides Linux applications with the benefits of Scout paths, including early packet demultiplexing, per-flow accounting of resources, and explicit scheduling of network processing. SILK also introduces the concept of an \emph{extended path} to provide a framework for application QoS. We demonstrate the utility of SILK by showing how it can provide QoS for the Apache Web server. } } @TechReport{ it:2002-008, author = {Henrik Bj{\"o}rklund and Sergei Vorobyov}, title = {Two Adversary Lower Bounds for Parity Games}, institution = it, department = csd, year = 2002, number = {2002-008}, month = feb, abstract = {By using the adversary arguments we settle the first exponential lower bounds for restricted classes of algorithms solving Parity Games The first result applies to any algorithms that rely only on estimating values of vertices from the viewpoint of one player and ignore the game graph structure (a rough abstraction of different fix-point algorithms), the second settles the lower bound for a randomized algorithm that samples from the set of optimal counterstrategies (a popular idea used in many approaches). } } @TechReport{ it:2002-007, author = {F. Edelvik and G. Ledfelt and P. L{\"o}tstedt and D. J. Riley}, title = {An Unconditionally Stable Subcell Model for Arbitrarily Oriented Thin Wires in the FETD Method}, institution = it, department = tdb, year = 2002, number = {2002-007}, month = feb, abstract = {A computational subcell model for thin wires is developed for electromagnetic simulations. The Maxwell equations are discretized by a finite element approximation on a tetrahedral grid. The wires are described by a second-order equation for the current. The geometry of the wires can be chosen independent of the volume grid. A symmetric coupling between field and wires yields a stable semi-discrete field-wire system and an unconditionally stable fully discrete field-wire system. The system of equations is in each time step solved by a preconditioned conjugate gradient method. The accuracy of the subcell model is demonstrated for dipole and loop antenna with comparisons with the Method of Moments and experimental data. } } @TechReport{ it:2002-006, author = {Anders Berglund}, title = {How do students understand network protocols? A phenomenographic study}, institution = it, department = docs, year = 2002, number = {2002-006}, month = feb, abstract = {University students' understanding of network protocols is in focus in this report. With an overall aim to improve learning and teaching in computer systems at a university level, an empirically based study has been performed. In the study, the different ways in which students understand three specific network protocols -- TCP, UDP and RMI -- as well as the general concept of a network protocol have been investigated with a phenomenographic research approach. Qualitatively different ways of understanding or experiencing network protocols are discerned. The identified critical differences between the understandings are ``how'' or ``as what'' the protocols are understood, ``as a part of which framework'' the protocols exist, and ``in what way'' the protocols are described. Although experienced as different, the three protocols are understood as being parts of similarly frameworks. Recommendations for teaching of computer systems in distributed projects are made, based on the results. Universities should teach computer networks in a way that encourages students to understand network protocols in these critically different ways, and that stimulates them to shift between these ways depending on the task at hand.} } @TechReport{ it:2002-005, author = {Kajsa Ljungberg and Sverker Holmgren and {\"O}rjan Carlborg}, title = {Efficient Kernel Algorithms for QTL Mapping Problems}, institution = it, department = tdb, year = 2002, number = {2002-005}, month = feb, abstract = {The advent of sophisticated and powerful methods for molecular genetics pushes the need for efficient methods for data analysis. Advanced algorithms are necessary for extracting all possible information from laboriously obtained data sets. We present a general linear algebra framework for QTL mapping, applicable to many commonly used methods, using both linear regression and maximum likelihood estimation. The formulation simplifies future comparisons between and analyses of the methods. We show how the common structure of QTL analysis models can be used to improve the kernel algorithms, drastically reducing the computational effort while retaining the original analysis results. We have evaluated our new algorithms on data sets originating from two large F$_2$ populations of domestic animals. Using an updating approach, we show that 1-3 orders of magnitude reduction in computational demand can be achieved for matrix factorizations. For interval mapping/composite interval mapping settings using a maximum likelihood model, we also show how to use the original EM algorithm instead of the ECM approximation, significantly improving the convergence and introducing an additional reduction in the computational time. The algorithmic improvements makes it feasible to perform analyses previously deemed impractical or even impossible. For example, using the new algorithms it is reasonable to perform permutation testing using exhaustive search on populations of 200 individuals for fully epistatic two-QTL models with a large number of parameters. } } @TechReport{ it:2002-004, author = {Mats L{\"o}fberg and Jan Gulliksen and Torsten Norlander}, title = {Anv{\"a}ndare i systemutvecklingsprocessen - ett fenomenologiskt perspektiv}, institution = it, department = hci, year = 2002, number = {2002-004}, month = feb, note = {In Swedish}, abstract = {This study has been accomplished through the Department of Psychology, Institution of Social Science, University of Karlstad. The study is further presented as a part of the VERKA-project, Department of Human-Computer Interaction (HCI), Institution of Information Technology, Uppsala University. The aim of the study is to examine how user participation in systems development can successfully be accomplished. Another purpose is the development of guidelines in user centred systems design. The result is analysed and described from a psychological perspective. A qualitative method was deemed to be the appropriate way to obtain data in the study. The empirical information was collected through interviews. To analyse the material Karlsson's Empirical Phenomenological Psychological Method (the EPP-method) was used. The respondents were participants in RSV's and RFV's systems development projects under the VERKA-project at Uppsala University. A total of 11 respondents participated in the study. Generally the respondents had a positive attitude towards participating in systems development projects - even if the work from time to time was considered burdensome. The participants felt like equal members in the systems design process, and they felt that their opinions were as important as anyone else's. Another discovery was a role division with the members of the systems development project on one side and their former colleagues on the other. An important part in future development processes can be to inform user representatives about experiences from earlier participants. The development of cooperation between projects and former colleagues can have a positive impact on acceptance, learning and attitudes. } } @TechReport{ it:2002-003, author = {Ken Mattsson and Magnus Sv{\"a}rd and Jan Nordstr{\"o}m}, title = {Stable and accurate artificial dissipation}, institution = it, department = tdb, year = 2002, number = {2002-003}, month = jan, abstract = {Stability for nonlinear convection problems using centered difference schemes require the addition of artificial dissipation. In this paper we present dissipation operators that preserve both stability and accuracy for high order finite difference approximations of initial boundary value problems. } } @TechReport{ it:2002-002, author = {S. Seipel and M. Lindkvist}, title = {Methods and application of interactive 3D computer graphics in anthropology}, institution = it, department = hci, year = 2002, number = {2002-002}, month = jan, abstract = {This report presents 3D computer graphics techniques for interactive visual exploration of virtual mummies. It is part of an exposition on the Cultural Heritage of the Egyptian Mummies in the Museum Gustavianum, in Uppsala. We describe a general-purpose projection metaphor for correctly presenting virtual 3D images on the dissection table of a historical anatomic theatre. This method allows for dynamic off-axis perspective viewing situations as well as it provides keystone correction for excessive projection angles as necessitated by the specific installation environment. For the application to reach out beyond the scope of the exhibition, we developed an adaptive image-based rendering approach that scales with the performance of the rendering host. Based on dynamic mesh simplification of the 3D mummy model, it automatically performs re-projections of texture images in order to maintain correct visual results. For interaction purposes with a digitiser tablet we present a means of stroke-based input that provides ease of use to non-expert visitors of the exhibition. The problems we address with this paper are not only of interest for this particular application domain but generally for all interactive graphical installations, which must be adapted to the existing architectural situation.} } @TechReport{ it:2002-001, author = {S{\^o}nia M. Gomes and Bertil Gustafsson}, title = {Combining Wavelets with Finite Differences: Consistency Analysis}, institution = it, department = tdb, year = 2002, number = {2002-001}, month = jan, abstract = {The method is an adaptive finite difference strategy for numerical solution of evolution partial differential equations. The principle is to represent the solution only through those point values indicated by the significant wavelet coefficients. Typically, few points are found in each time step, the grid being coarse in smooth regions, and refined close to irregularities. At each point, the spatial derivatives are discretized by uniform finite differences, using step size proportional to the point local scale. Eventually, required neighboring stencils are not present in the grid. In such case, the correspondig point values are approximated from coarser scales by using reconstruction operators defined by means of interpolating subdivision scheme. Our purpose in this paper is to analyse a generalization of the concept of truncation error, which is the familiar basis of the analysis of difference schemes. For this consistency analysis, we show that the adaptive finite difference scheme can also be formulated in terms of a collocation scheme for an adapted wavelet expansion of the solution. For this purpose, we first prove some results concerning the local behavior of the reconstruction operators, which stand for appropiate cone-like grids. } } @TechReport{ it:2001-030, author = {Jakob Engblom and Andreas Ermedahl and Friedhelm Stappert}, title = {Validating a Worst-Case Execution Time Analysis Method for an Embedded Processor}, institution = it, department = docs, year = 2001, number = {2001-030}, month = dec, abstract = {Knowing the Worst-Case Execution Time (WCET) of a program is necessary when designing and verifying real-time systems. When evaluating WCET analysis methods, the common methodology is to compare a WCET estimate with an execution of the same program with known worst-case data on the target hardware. This evaluation method is inadequate, since errors in one part of the analysis might mask errors occuring in other parts of the analysis. In this paper we present a methodology for systematically testing WCET analysis tools for modern pipelined processors. The methodology is based on a decomposition of WCET analysis into a set of components that should be tested and validated in isolation. Our testing methodology does not require that we have a perfect model of the hardware, thus the validation of the hardware model is considered as a separate problem. We apply the methodology to our previously published WCET analysis method, and show that the pipeline analysis and the calculation method we use are safe and produce tight results. We also show that our WCET analysis method can handle programs containing nested loops, functions whose execution times depend on parameters, multiway branches (switch statements) and unstructured code. } } @TechReport{ it:2001-029, author = {Henrik Lundgren and David Lundberg and Johan Nielsen and Erik Nordström and Christian Tschudin}, title = {A Large-scale Testbed for Reproducible Ad hoc Protocol Evaluations}, institution = it, department = docs, year = 2001, number = {2001-029}, month = nov, note = {A shorter version of this paper is accepted for publication in the proceedings of IEEE WCNC'02}, abstract = {We have built an Ad hoc Protocol Evaluation testbed (APE) in order to perform large-scale, reproducible experiments. APE aims at assessing several different routing protocols in a \emph{real world} environment instead of by simulation. We present the APE testbed architecture and report on initial experiments with up to 37 physical nodes that show the reproducibility and scalability of our approach. Several scenario scripts have been written that include strict choreographic instructions to the testers who walk around with ORiNOCO equipped laptops. We introduce a metric called \emph{Virtual Mobility} that we use to compare different testruns. This metric is based on the \emph{measured} signal quality instead of the geometric distance between nodes, hence it reflects how a routing protocol actually \emph{perceives} the network's dynamics.} } @TechReport{ it:2001-028, author = {Magnus Berggren}, title = {Wireless communication in telemedicine using Bluetooth and {IEEE} 802.11b}, institution = it, department = docs, year = 2001, number = {2001-028}, month = nov, note = {M.Sc. thesis}, abstract = {This thesis explores the issues surrounding the simultaneous deployment of Bluetooth \& Wireless LAN for networked devices in a telemedicine environment. A key aspect in telemedicine is the ability of the two standards to co-exist in close proximity. This work defines telemedicine, gives examples of applications, and describes the two wireless techniques and the issues that arise when considering the introduction of wireless communication. The thesis tries to answer the question whether the two wireless techniques Bluetooth and Wireless LAN (IEEE 802.11b) can co-exist in a telemedicine environment. In order to do this, quantified measurements where performed were the two wireless techniques where exposed to radio interference from the other technique. These results are printed as graphs and explained. } } @TechReport{ it:2001-027, author = {K. Mahata and T. S{\"o}derstr{\"o}m and M. Mossberg and L. Hillstr{\"o}m and S. Mousavi}, title = {On the use of flexural wave propagation experiments for identification of complex modulus}, institution = it, department = syscon, year = 2001, number = {2001-027}, month = nov, note = {Also in the proceedings of 3rd International Conference on Identification of Engineering System, Swansea 2002}, abstract = {In this paper, we investigate the estimation of the complex modulus of a viscoelastic material from flexural wave experiments. A bar specimen of uniform cross-section is subjected to lateral impact by a steel ball giving rise to flexural waves traveling along the bar. The strains due to wave propagation are registered as functions of time using strain gauges at different sections. The measured strains are transformed in to the frequency domain. A non-parametric estimation of the complex modulus is carried out for each frequency. An analysis of the quality of the non-parametric estimate is carried out. The validity of the theoretical results are confirmed by numerical studies and experimental tests. } } @TechReport{ it:2001-026, author = {Jan Gulliksen and Inger Boivie}, title = {Usability Throughout the Entire Software Development Lifecycle - A Summary of the {INTERACT} 2001 Workshop}, institution = it, department = hci, year = 2001, number = {2001-026}, month = nov } @TechReport{ it:2001-025, author = {Emmanuel Beffara and Sergei Vorobyov}, title = {Is Randomized Gurvich-Karzanov-Khachiyan's Algorithm for Parity Games Polynomial?}, institution = it, department = csd, year = 2001, number = {2001-025}, month = nov, abstract = {We report on the experimental study of the Gurvich-Karzanov-Khachiyan (\GKK) algorithm for cyclic games adapted for \emph{parity games} (equivalent to the $\mu$-calculus model checking), one of the major open problems in complexity and automata theories, computer-aided verification. The algorithms demonstrates excellent polynomial (actually, a sublinear number of iterations) behavior in a substantial (millions) number of experiments with games of sizes up to 20.000 and 6 -- 7 colors. It also allows for a natural randomization. We conducted extensive experiments of the randomized \GKK{} algorithm on the `hard' Lebedev-Gurvich's game instances, which force the deterministic version of the algorithm to make \emph{exponentially} many iterations. With high probability the algorithm converges after just a few hundred iterations (compared with $2^{50}$ -- $2^{60}$ for deterministic version). This allows for giving up computations that converge slowly and restart with a fresh initial potential transformation. While it remains to be theoretically justified and proved, we present convincing experimental data on dependency of the average and maximal number of iterations on game sizes, outdegrees, colors, initial randomizations. } } @TechReport{ it:2001-024, author = {Larisa Beilina and Klas Samuelsson and Krister {\AA}hlander}, title = {A hybrid method for the wave equation}, institution = it, department = tdb, year = 2001, number = {2001-024}, month = oct, note = {Also available as Preprint 2001-14 in Chalmers Finite Element Center Preprint series}, abstract = {Hybrid finite element/finite difference simulation of the wave equation is studied. The simulation method is hybrid in the sense that different numerical methods, finite elements and finite differences, are used in different subdomains. The purpose is to combine the flexibility of finite elements with the efficiency of finite differences. The construction of proper geometry discretisations is important for the hybrid approach. A decomposition of the computational domain is described, which yields simple communication between structured and unstructured subdomains. An explicit hybrid method for the wave equation is constructed where the explicit finite difference schemes and finite element schemes coincide for structured subdomains. These schemes are used in the hybrid approach, keeping finite differences on the structured subdomains and applying finite elements on the unstructured domains. As a consequence of the discretisation strategy, the resulting hybrid scheme can be regarded as a pure finite element scheme. Any numerical difficulties such as instabilities at the interfaces are thus avoided. The feasibility of the hybrid approach is illustrated by numerous wave equation simulations in two and three space dimensions. In particular, the approach can easily be used for implementing absorbing boundary conditions. The efficiency of different approaches is a key issue of the current study. For our test cases, the hybrid approach is about 5 times faster than a corresponding highly optimised finite element method. It is concluded that the hybrid approach may be an important tool to reduce the execution time and memory requirement for this kind of large scale computations. } } @TechReport{ it:2001-023, author = {Pierre Flener and Alan Frisch and Brahim Hnich and Zeynep Kiziltan and Ian Miguel and Toby Walsh}, title = {Matrix Modelling}, institution = it, department = csd, year = 2001, number = {2001-023}, month = sep, abstract = {We argue that constraint programs with one or more matrices of decision variables provide numerous benefits, as they share many patterns for which general methods can be devised, such as for symmetry breaking. On a wide range of real-life application domains, we demonstrate the generality and utility of such matrix modelling. }, note = {Also released as Technical Report APES-36-2001 of the APES group, 2001, available at \url{http://www.dcs.st-and.ac.uk/~apes/reports/apes-36-2001.ps.gz}. Appears in the Proceedings of the CP-01 Workshop on Modelling and Problem Formulation. 7th International Conference on the Principles and Practice of Constraint Programming, 2001} } @TechReport{ it:2001-022, author = {Pierre Flener and Alan Frisch and Brahim Hnich and Zeynep Kiziltan and Ian Miguel and Justin Pearson and Toby Walsh}, title = {Symmetry in Matrix Models}, institution = it, year = 2001, number = {2001-022}, month = sep, abstract = {Many constraint satisfaction problems (such as scheduling, assignment, and configuration) can be modelled as constraint programs based on matrices of decision variables. In such matrix models, symmetry is an important feature. We study and generalise symmetry-breaking techniques, such as lexicographic ordering, and propose a labelling technique achieving the same effect.}, note = {Also released as Technical Report APES-30-2001 of the APES group, 2001, available at \url{http://www.dcs.st-and.ac.uk/~apes/reports/apes-30-2001.ps.gz}. Appears in the Proceedings of the CP-01 Workshop on Symmetry in Constraints. 7th International Conference on the Principles and Practice of Constraint Programming, 2001 } } @TechReport{ it:2001-021, author = {Inger Boivie}, title = {Usability and Design Decisions in Software Development}, institution = it, department = hci, year = 2001, number = {2001-021}, month = sep, abstract = {Despite the attention that has been paid to usability in the last few years, the world is still full of inadequate software and frustrated users. The aim of this study was to deepen the understanding of how usability matters are handled in software development projects, particularly at the Swedish National Tax Board (Riksskatteverket - RSV) and the Swedish National Board for Social Securities (Riksf{\"o}rs{\"a}kringsverket - RFV). The main focus of the study was usability and interaction design decisions in software development projects, that is, considerations in the software development process that have implications on the usability of the resulting system. Who makes the decisions about the interaction design? When are they made? What are the sources of input and the main constraints? What support regarding usability matters does the decision maker have access to? The study was conducted by means of a series of semi-structured interviews with eight developers and usability people in the two organisations. The study shows that there is no simple answer to who makes the interaction design decisions and when they are made. Rather, it is a case of everybody (developers and user representatives) making this kind of decisions, all the time. Many decisions are never made, they just happen as a result of somebody coding a bit, or modelling a bit. The matter of responsibility for the interaction design was unclear, causing frustration. Use cases are the basis of all design, and thus crucial to the usability of the resulting system. But, use cases were reported to be difficult write. They easily turn into ``system operations'' providing little or no support for interaction design. One conclusion is that usability requires hands-on activities throughout the project, or it ``gets lost'', this is particularly important during construction. One way of achieving continuous attention could be to incorporate a usability role in the system development process and give it sufficient status. } } @TechReport{ it:2001-020, author = {Emmanuel Beffara and Sergei Vorobyov}, title = {Adapting {G}urvich-{K}arzanov-{K}hachiyan's Algorithm for Parity Games: Implementation and Experimentation}, institution = it, department = csd, year = 2001, number = {2001-020}, month = sep, abstract = { We suggest and experimentally test a new approach to solving the \textsc{Parity Games} problem, equivalent to the \emph{propositional modal $\mu$-calculus}, a fundamental problem in automata, games, complexity theories, as well as in practical computer-aided verification, whose precise computational complexity is a long-standing open question. Based on the algorithm due to Gurvich, Karzanov, and Khachiyan for solving the more general \textsc{Mean Payoff Games}, we make an adaptation and optimization for parity games, implement it in Knuth's system CWEB of ``literate programming'' using C++. The algorithm is less known in the model-checking community and is based on different ideas, as compared to the well-investigated ideas including fixpoint iteration, strategy improvement, dynamic programming. Run on a considerable number (several millions) of randomly generated test game instances with up to 20.000 vertices and 6 -- 7 colors, the algorithm demonstrates a rapid convergence with sublinear number of iterations. Moreover, unlike other known algorithms, for which examples of exponential behaviors are known, our algorithm admits internal randomization, which gives improvements in our tests and may help the algorithm to avoid bad cases. Our experiments strongly suggest that the algorithm is polynomial on the average under reasonable distributions, which remains to be explained in further studies. This report contains results of the summer project done by the first author under the supervision and with participation of the second author. It represents an ongoing project, the new updates will be available from \url{http://www.csd.uu.se/~vorobyov/eb-gkk} } } @TechReport{ it:2001-019, author = {Wendy Kress and Jonas Nilsson}, title = {Boundary conditions and estimates for the linearized {N}avier-{S}tokes equations on staggered grids}, institution = it, department = tdb, year = 2001, number = {2001-019}, month = sep, abstract = {In this paper we consider the linearized Navier-Stokes equations in two dimensions under specified boundary conditions. We study both the continuous case and a discretization using a second order finite difference method on a staggered grid and derive estimates for both the analytic solution and the approximation on staggered grids. We present numerical experiments to verify our results. } } @TechReport{ it:2001-018, author = {Emad Abd-Elrady}, title = {An adaptive grid point {RPEM} algorithm for harmonic signal modeling}, institution = it, department = syscon, year = 2001, number = {2001-018}, month = aug, abstract = {Periodic signals can be modeled as a real wave with unknown period in cascade with a piecewise linear function. In this report, a recursive Gauss-Newton prediction error identification algorithm for joint estimation of the driving frequency and the parameters of the nonlinear output function parameterized in a number of adaptively estimated grid points is introduced. The Cramer-Rao bound (CRB) is derived for the suggested algorithm. Numerical examples indicate that the suggested algorithm gives better performance than using fixed grid point algorithms and easily can be modified to track both the fundamental frequency variations and the time varying amplitude.} } @TechReport{ it:2001-017, author = {Henrik Bj{\"o}rklund and Viktor Petersson and Sergei Vorobyov}, title = {Experiments with Iterative Improvement Algorithms on Completely Unimodal Hypercubes}, institution = it, department = csd, year = 2001, number = {2001-017}, month = aug, abstract = {Completely unimodal (i.e., having a unique local minimum on every face) numberings of many-dimensional hypercubes are abstract versions of different optimization problems, like linear programming, decision problems for games, and abstract optimization problems. In this paper we investigate and compare the behaviors of seven iterative improvement algorithms: 1) the Greedy Single Switch Algorithm (GSSA), 2) the Random Single Switch Algorithm (RSSA), 3) the All Profitable Switches Algorithm (APSA), 4) the Random Multiple Switches Algorithm (RMSA), 5) Kalai-Ludwig's Randomized Algorithm (KLRA), 6) Weighted Random Multiple Switch Algorithm (WRMSA), 7) Weighted Greedy Multiple Switch Algorithm (WGMSA). Our experiments were conducted on all completely unimodal four-dimensional hypercubes and on randomly generated hypercubes of dimensions up to sixteen, Hamiltonian (presumably corresponding to hard problem instances) and non-Hamiltonian. Local-search improvement algorithms 1 and 2 have been investigated earlier, but number 3, 4, 5, 6, and 7 probably not. Algorithm 5 (first time used for completely unimodal hypercubes in this paper) is the only algorithm with the known \emph{subexponential} expected worst-case running time. However, the algorithms 1, 3, 4, 6, 7 demonstrate superior behaviors compared to the other two investigated algorithms. This suggests that further theoretical and experimental studies of these algorithms should be carried out. } } @TechReport{ it:2001-016, author = {Robert Stjernstr{\"o}m}, title = {User-Centred Design of a Train Driver Display}, institution = it, department = hci, year = 2001, number = {2001-016}, month = aug, note = {M.Sc. thesis}, abstract = {This paper describes how a graphical user interface was developed in the TRAIN Project at the Department of Information Technology at Uppsala University. The TRAIN Project was initiated by The Board of Accident Investigation due to the commuter train accident in {\"A}lvsj{\"o} in 1994. The Swedish National Rail Administration was assigned to fund and manage the project, while independent researchers where assigned to carry out the research. One objective has been to investigate how the future work environment for train drivers can be improved. To accomplish this, requirements have been collected using user-centred design with rapid prototyping in Java and XML. Another objective has been to introduce user-centred design as an advantageous method for further development of the graphical user interface The result is a prototype containing most of the requirements from the drivers in the work group. The prototype is interactive and simulates the real behaviour of a train - acceleration, retardation and the train track are modelled according to reality. The conclusions are that the prototype can be used by train drivers to refine the requirements of the system even further. Furthermore, the prototype can be used to give management a deeper understanding of what type of system the users require and an implication that the user-centred design can be favorably employed in future work.} } @TechReport{ it:2001-015, author = {Magnus Sv{\"a}rd}, title = {On coordinate transformations for summation-by-parts operators}, institution = it, department = tdb, year = 2001, number = {2001-015}, month = aug, abstract = {High order finite difference methods obeying a summation-by-parts (SBP) rule are developed for equidistant grids. With curvilinear grids, a coordinate transformation operator that does not destroy the SBP property must be used. We show that it is impossible to construct such an operator without decreasing the order of accuracy of the method. } } @TechReport{ it:2001-014, author = {Paul Pettersson and Sergio Yovine}, title = {Workshop on Real-Time Tools (Proceedings)}, institution = it, department = docs, year = 2001, number = {2001-014}, address = {Aalborg University, Denmark}, month = aug } @TechReport{ it:2001-013, author = {Eva Olsson and Lena Kecklund and Michael Ingre and Anders Jansson}, title = {Lokf{\"o}rarens informationsmilj{\"o} och {ATC}. Ett anv{\"a}ndarperspektiv}, institution = it, department = hci, year = 2001, number = {2001-013}, month = jun, note = {In Swedish}, abstract = {Syftet med TRAIN-projektet var att utv{\"a}rdera de trafiks{\"a}kerhetsm{\"a}ssiga effekterna av f{\"o}rarens informationsmilj{\"o} och arbetssituation. I denna rapport redovisas resultaten fr{\aa}n den enk{\"a}tunders{\"o}kning som under h{\"o}sten 1999 som unders{\"o}kte lokf{\"o}rarens informationsmilj{\"o} med s{\"a}rskilt fokus p{\aa} anv{\"a}ndningen av ATC-systemet. En sammanfattande bild av lokf{\"o}rarens arbete visar att en viktig uppgift {\"a}r att samla in och integrera information fr{\aa}n olika informationsk{\"a}llor f{\"o}r en s{\"a}ker och punktlig k{\"o}rning. ATC {\"a}r en viktig informationsk{\"a}lla vid k{\"o}rning av b{\aa}de pendelt{\aa}g och X2. Var man h{\"a}mtar sin information beror i viss utstr{\"a}ckning p{\aa} fordon och typ av arbetsuppgift. F{\"o}rarna ans{\aa}g att ATC var ett mycket viktigt hj{\"a}lpmedel och st{\"o}d i k{\"o}rningen s{\"a}rskilt under perioder av tr{\"o}tthet och f{\"o}rs{\"a}mrad uppm{\"a}rksamhet. Det fanns dock en del brister med avseende p{\aa} hur ATC presenterar information, hur olika funktioner i ATC utformats samt hur f{\"o}raren och ATC-systemet samverkar. Den trafiks{\"a}kerhetsm{\"a}ssiga p{\aa}verkan av dessa f{\"o}rh{\aa}llanden kommenteras i rapporten. Sammantaget kan man dock konstatera att inf{\"o}randet av ATC i Sverige inneburit en v{\"a}sentlig {\"o}kning av trafiks{\"a}kerhetsniv{\aa}n. } } @TechReport{ it:2001-012, author = {Friedhelm Stappert and Andreas Ermedahl and Jakob Engblom}, title = {Efficient Longest Executable Path Search for Programs with Complex Flows and Pipeline Effects}, institution = it, department = docs, year = 2001, number = {2001-012}, month = jun, abstract = {Knowing the Worst-Case Execution Time (WCET) of a program is necessary when designing and verifying real-time systems. A correct WCET calculation method must take into account the possible program flow, like loop iterations and function calls, as well as effects of hardware features, like caches and pipelines. In this paper we describe our work on calculating the WCET of programs with complex flow information using a path-based approach, and on how pipeline information can be efficiently included in the calculation. The result is a set of algorithms that allows for efficient and precise calculation of the WCET of programs with more complex flows and pipeline effects than was previously possible. This technical report is an extended version of the conference submission.} } @TechReport{ it:2001-011, author = {Torsten S{\"o}derstr{\"o}m and Umberto Soverini and Kaushik Mahata}, title = {Perspectives on errors-in-variables estimation for dynamic systems}, institution = it, department = syscon, year = 2001, number = {2001-011}, month = may, abstract = {The paper gives an overview of various methods for identifying dynamic errors-in-variables systems. Several approaches are classified by how the original information in time-series data of the noisy input and output measurements is condensed before further processing. For some methods, such as instrumental variable estimators, the information is condensed into a nonsymmetric covariance matrix as a first step before further processing. In a second class of methods, where a symmetric covariance matrix is used instead, the Frisch scheme and other bias-compensation approaches appear. When dealing with the estimation problem in the frequency domain, a milder data reduction typically takes place by first computing spectral estimators of the noisy input-output data. Finally, it is also possible to apply maximum likelihood and prediction error approaches using the original time-domain data in a direct fashion. This alternative will often require quite high computational complexity but yield good statistical efficiency. The paper is also presenting various properties of parameter estimators for the errors-in-variables problem, and a few conjectures are included, as well as some perspectives and experiences by the authors. } } @TechReport{ it:2001-010, author = {Thiemo Voigt and Per Gunningberg}, title = {Dealing with Memory-Intensive Web Requests}, institution = it, department = docs, year = 2001, number = {2001-010}, month = may, abstract = {Web servers must be protected from overload since server overload can lead to low throughput and high, unpredictable response times. In this paper we present a new admission control architecture that prevents server overload caused by high paging rates. The main idea of our admission control scheme is to delay the processing of requests when there is high memory pressure. We present experiments that show that our admission control architecture sustains high throughput even during a flash crowd of memory-intensive requests. } } @TechReport{ it:2001-009, author = {Johan Bengtsson}, title = {Reducing Memory Usage in Symbolic State-Space Exploration for Timed Systems}, institution = it, department = docs, year = 2001, number = {2001-009}, month = may, abstract = {One of the major problems when applying model checkers to industrial-size system is the large memory consumption. This report address the problem in the context of verifiers for timed systems and present a number of techniques that reduce the amount memory used for state space exploration in such a tool. The methods are evaluated and compared by real-life examples and their strengths and weaknesses are described. In particular we adress the memory consumption problem on two fronts, first by reducing the size of each symbolic state by means of compression and second by reducing the size of the stored state space by early inclusion checking and probabilistic methods. } } @TechReport{ it:2001-008, author = {Viktor Petersson and Sergei Vorobyov}, title = {Parity Games: Interior-Point Approach}, institution = it, department = csd, year = 2001, number = {2001-008}, month = may, abstract = {We introduce and investigate continuous optimization techniques for solving \textsc{Parity Games}, based on the \emph{interior-point paradigm}, combining barrier functions and quasi-Newton methods. These have been proven very successful for \textsc{Linear and Convex Programming}. The basic step is to leave the boundary (i.e. the stationary strategies in which such games can be solved) and to cut through the interior (i.e. probabilistic strategies) of the many-dimensional hypercube of strategies. } } @TechReport{ it:2001-007, author = {Sven Montan}, title = {Validation of Cycle-Accurate {CPU} Simulators against Real Hardware}, institution = it, department = docs, year = 2001, number = {2001-007}, month = apr, note = {M.Sc. thesis}, abstract = {One of the most important issues regarding real-time software performance is the worst case execution time (WCET). There are several methods for deducing the WCET statically; one such analysis system is currently under development by WCET researchers from Uppsala University in co-operation with C-lab in Paderborn. The system uses a CPU simulator for calculating the execution time of selected parts of the target program, which creates a demand for a very cycle-accurate simulator. In this thesis, we give a method for validating a CPU simulator against real hardware CPU. The proposed test method is a black-box test method that relies on hardware analysis for test-case generation. The validation method aims at both determining the accuracy of the simulator and to pinpoint simulator errors for improving the accuracy. We have tested this validation method on a NEC V850 CPU core simulator, and the results show that the average error-rate drops from 11,2\% to 1,3\% for a set of benchmark programs. } } @TechReport{ it:2001-006, author = {Erik K. Larsson and Torsten S{\"o}derstr{\"o}m}, title = {Identification of Continuous-Time {AR} Processes by Using Limiting Properties of Sampled Systems}, institution = it, department = syscon, year = 2001, number = {2001-006}, month = mar, abstract = {Consider the problem of estimating the parameters in a continuous-time autoregressive (CAR) model from discrete-time samples. In this paper a simple and computationally efficient method is introduced, and analyzed with respect to bias distribution. The approach is based on replacing the derivatives by delta approximations, forming a linear regression, and using the least squares method. It turns out that consistency can be assured by applying a particular prefilter to the data; a filter that is easy to compute and is only dependent on the order of the continuous-time system. As a side effect several general properties for discrete-time autoregressive moving average (ARMA) systems originating from sampled CAR-processes will also be presented. Finally, the introduced method is compared to other methods in some simulation studies. } } @TechReport{ it:2001-005, author = {Yordanos G. Beyene}, title = {Firewalls in {L}inux: Principles and Implementation}, institution = it, department = docs, year = 2001, number = {2001-005}, month = feb, note = {M.Sc. thesis}, abstract = {This paper is meant to provide techniques in building and installing a standalone packet filtering firewall in Linux machines, mainly for small sites who don't give much service to Internet users. It deals with attenuating the effect of the most common types of attacks using ipchains. It guides how to design, implement, debug, and maintain Firewall. Techniques for continuously monitoring attacks is attempted. It also give a historical, architectural and technical overview of firewalls and security attacks. } } @TechReport{ it:2001-004, author = {Bernhard M{\"u}ller and H.C. Yee}, title = {High Order Numerical Simulation of Sound Generated by the {K}irchhoff Vortex}, institution = it, department = tdb, year = 2001, number = {2001-004}, month = feb, note = {Published as RIACS Technical Report 01.02, Jan. 2001, NASA Ames Research Center, and submitted to Computing and Visualization in Science}, abstract = {An improved high order finite difference method for low Mach number computational aeroacoustics (CAA) is described. The improvements involve the conditioning of the Euler equations to minimize numerical cancellation errors, and the use of a stable non-dissipative sixth-order central spatial interior scheme and a third-order boundary scheme. Spurious high frequency oscillations are damped by a third-order characteristic-based filter. The objective of this paper is to apply these improvements in the simulation of sound generated by the Kirchhoff vortex. } } @TechReport{ it:2001-003, author = {Karl H{\"o}rnell and Per L{\"o}tstedt}, title = {Adaptive Iteration to Steady State of Flow Problems}, institution = it, department = tdb, year = 2001, number = {2001-003}, month = feb, abstract = {Runge-Kutta time integration is used to reach the steady state solution of discretized partial differential equations. Continuous and discrete parameters in the method are adapted to the particular problem by minimizing the residual in each step, if this is possible. Algorithms for parameter optimization are devised and analyzed. Solutions of the nonlinear Euler and Navier-Stokes equations for compressible flow illustrate the methods.} } @TechReport{ it:2001-002, author = {Johan Steensland}, title = {Dynamic Structured Grid Hierarchy Partitioners Using Inverse Space-Filling Curves}, institution = it, department = tdb, year = 2001, number = {2001-002}, month = feb, abstract = {This paper discusses partitioning of dynamic structured grid hierarchies, occuring in structured adaptive mesh refinement (SAMR) applications. When a SAMR method is executed on a parallel computer, the work load will change dynamically. Thus, there is need for dynamic load balancing. Inverse space-filling curve partitioning (ISP) is appealing for load balancing in parallel SAMR, because of its speed. In this paper, ISP is considered as \textit{part} of a partitioning approach, which combines structured and unstructured techniques. More precisely, various design decisions for the structured partitioning are considered. Different design choices lead to graphs with different properties. The main objective is to investigate how these differences affect the behavior of ISP. The paper contributes by (a)~identifying certain design choices as being advantageous, and (b) presenting four new partitioning algorithms that correspond to these design decisions. } } @TechReport{ it:2001-001, author = {Viktor Petersson and Sergei Vorobyov}, title = {A Randomized Subexponential Algorithm for Parity Games}, institution = it, department = csd, year = 2001, number = {2001-001}, month = jan, abstract = {We describe a randomized algorithm for \textsc{Parity Games} (equivalent to the \textsc{Mu-Calculus Model Checking}), which runs in expected time $2^{O(k^{1/(1+2\varepsilon)})}$ \emph{subexponential} in the number of colors $k$ of the game graph when $k$ is $\Omega(n^{1/2+\varepsilon})$, $n$ is the number of vertices, and $0<\varepsilon\leq 1/2$. All previously known algorithms were \emph{exponential} in the number of colors, with the best one taking time and space $O(k^2\cdot n\cdot \sqrt{n}^k)$. Our algorithm does not rely on Linear Programming subroutines and uses a low-degree polynomial space. } } @TechReport{ it:2000-035, author = {Henrik Brand{\'e}n and Sverker Holmgren}, title = {Convergence acceleration for the steady state {E}uler equations}, institution = it, department = tdb, year = 2000, number = {2000-035}, month = dec, abstract = {We consider the iterative solution of systems of equations arising from discretizations of the non-linear Euler equations governing compressible flow. The differential equation is discretized on a structured grid, and the steady state solution is computed by a time-marching method. A convergence acceleration technique based on semicirculant approximations of the difference operator or the Jacobian is used. Implementation issues and variants of the scheme allowing for a reduction of the arithmetic complexity and memory requirement are discussed. The technique can be combined with a variety of iterative solvers, but we focus on non-linear explicit Runge-Kutta time-integration schemes. The results show that the single-stage forward Euler method can be used, and the time step is not limited by a CFL-criterion. Instead it can be chosen as a constant, independent of the number of grid points. This results in that the arithmetic work required for computing the solution is equivalent to the work required for a fixed number of residual evaluations. Two major advantages of the semicirculant convergence acceleration technique is that it contains few tunable parameters, and that it is robust with respect to the amount of artificial viscosity used in the discretization. } } @TechReport{ it:2000-034, author = {Alexander Shen}, title = {Algorithmic Information Theory and Kolmogorov Complexity}, institution = it # {, and Independent University of Moscow, Russia}, department = csd, year = 2000, number = {2000-034}, month = dec, abstract = {This document contains lecture notes of an introductory course on Kolmogorov complexity. They cover basic notions of algorithmic information theory: Kolmogorov complexity (plain, conditional, prefix), notion of randomness (Martin-Lof randomness, Mises-Church randomness), Solomonoff universal a priori probability and their properties (symmetry of information, connection between a priori probability and prefix complexity, criterion of randomness in terms of complexity) and applications (incompressibility method in computational complexity theory, incompleteness theorems). } } @TechReport{ it:2000-033, author = {Olsson, Eva and Sandblad, Bengt and Kecklund, Lena}, title = {S{\"a}kerhetssamtal. Nul{\"a}ge och f{\"o}r{\"a}ndringsbehov}, institution = it, department = hci, year = 2000, number = {2000-033}, month = dec, note = {In Swedish}, abstract = {Denna rapport {\"a}r en sammanst{\"a}llning av fr{\aa}gor som r{\"o}r kommunikationen mellan f{\"o}rare och t{\aa}gtrafikstyrning, s{\aa} kallade samtal i s{\"a}kerhetstj{\"a}nst. Intervjuer med sju f{\"o}rare fr{\aa}n Hagalund som gjorts inom ramen f{\"o}r projektet \emph{TRAIN} (Trafiks{\"a}kerhet och informationsmilj{\"o} f{\"o}r t{\aa}gf{\"o}rare) under december 1999 utg{\"o}r underlag f{\"o}r de synpunkter p{\aa} s{\"a}kerhetssamtal som kommer fr{\aa}n f{\"o}rare. P{\aa} motsvarande s{\"a}tt har ett antal intervjuer med t{\aa}gledare vid flera trafikledningscentraler tidigare genomf{\"o}rts inom ramen f{\"o}r forskningsprojektet \emph{Framtida t{\aa}gtrafikstyrning}. Inom det projektet har dessutom en arbetsgrupp under en serie m{\"o}ten bl.a. diskuterat hur man ser p{\aa} dagens s{\"a}kerhetssamtal, och p{\aa} de framtida f{\"o}r{\"a}ndringar som man ser som {\"o}nskv{\"a}rda. Resultat fr{\aa}n dessa diskussioner presenteras ocks{\aa} relativt utf{\"o}rligt h{\"a}r i form av mer framskridna f{\"o}rslag till kommunikationsl{\"o}sningar. Det framkommer p{\aa} olika s{\"a}tt, b{\aa}de i diskussioner med lokf{\"o}rare och med t{\aa}gledare, att man upplever brister i det s{\"a}tt man samverkar och kommunicerar. Bristerna har olika anledning. Det finns tekniska sv{\aa}righeter och sv{\aa}righeter som ligger i kunskap, organisation och rutiner. Det framkommer ocks{\aa} tydligt att man till stor del saknar information och kunskap om varandras arbete. Detta g{\"o}r det sv{\aa}rare att f{\"o}rst{\aa} varandras arbetsrelaterade problem och att veta vilken information som man skulle kunna l{\"a}mna f{\"o}r att f{\"o}rb{\"a}ttra varandras m{\"o}jligheter att prestera bra. Man saknar ofta information och beslutsunderlag under arbetsprocesserna. Detta leder till att man agerar utg{\aa}ende fr{\aa}n den begr{\"a}nsade information man har samt att man {\"a}r tvungen att g{\"o}ra bed{\"o}mningar av den r{\aa}dande situationen. F{\"o}rb{\"a}ttrad information presenterad p{\aa} ett s{\"a}tt som st{\"o}djer f{\"o}raren i arbetet kan ge varje enskild lokf{\"o}rare den {\"o}verblick av den aktuella och kommande situationen som bidrar till att trafikfl{\"o}det optimeras. Informationen skulle inte bara {\"o}ka s{\"a}kerheten, den skulle ocks{\aa} kunna medverka till mindre stress och mer planerad k{\"o}rning f{\"o}r lokf{\"o}rarna. P{\aa} samma s{\"a}tt skulle situationen f{\"o}r t{\aa}gledarna kunna f{\"o}rb{\"a}ttras om dessa hade mer information om f{\"o}rh{\aa}llandena som g{\"a}ller f{\"o}r lokf{\"o}rarna, vad som sker med loken och t{\aa}gen som har inverkan p{\aa} trafiken, eventuella st{\"o}rningar m.m. Man skulle d{\aa} kunna planera trafiken utifr{\aa}n mer aktuell och fullst{\"a}ndig information, kunna f{\"o}ruts{\"a}ga kommande st{\"o}rningar och konflikter i trafiken. Detta skulle leda till b{\"a}ttre s{\"a}kerhet, effektivare trafikf{\"o}ring och ett lugnare arbete p{\aa} trafikledningscentralen. } } @TechReport{ it:2000-032, author = {Henrik Brand{\'e}n and Per Sundqvist}, title = {Preconditioners Based on Fundamental Solutions}, institution = it, department = tdb, year = 2000, number = {2000-032}, month = nov, note = {Revised version available as IT technical report 2005-001}, abstract = {We consider a new convergence acceleration technique for the iterative solution of linear systems of equations that arise when discretizing partial differential equations. The method is applied to finite difference discretizations, but the ideas and the basic theory apply to other discretizations too. If $E$ is a fundamental solution of a differential operator $P$, we have \mbox{$E\ast(Pu)=u$.} Inspired by this, we choose the preconditioner to be a discretization of the approximative inverse $K$, given by \[ (Ku)(x)=\int_{\Omega}E(x-y)u(y)dy, \qquad x\in\Omega\subset\mathds{R}^d, \] where $\Omega$ is the domain of interest. The operator $K$ is only an approximation of $P^{-1}$ since we do not integrate over all of $\mathds{R}^d$ as for the convolution, and since we impose boundary conditions. Two main advantages of this method are that we can perform analysis before we discretize the operators, and that there is a fast way of applying the preconditioner using FFT. We present analysis showing that if $P$ is a first order differential operator, $KP$ is bounded. The analysis also describes how $K$ differs from $P^{-1}$. Implementation aspects are considered, and numerical results show grid independent convergence for first order partial differential equations. For the second order convection-diffusion equation convergence is no longer grid independent, a result that is consistent with our theory. However, if the grid is chosen to give a fixed number of grid points within boundary layers, the number of iterations is independent of the physical viscosity parameter. } } @TechReport{ it:2000-031, author = {Anders Jansson and Eva Olsson and Lena Kecklund}, title = {Att k{\"o}ra t{\aa}g}, institution = it, department = hci, year = 2000, number = {2000-031}, month = nov, note = {In Swedish} } @TechReport{ it:2000-030, author = {Richard Carlsson and Bj{\"o}rn Gustavsson and Erik Johansson and Thomas Lindgren and Sven-Olof Nystr{\"o}m and Mikael Pettersson and Robert Virding}, title = {Core {E}rlang 1.0 language specification}, institution = it, department = csd, year = 2000, number = {2000-030}, month = nov, abstract = {We describe a core language for the concurrent functional language Erlang, aptly named ``Core Erlang'', presenting its grammar and informal static and dynamic semantics relative to Erlang. We also discuss built-in functions and other open issues, and sketch a syntax tree representation.} } @TechReport{ it:2000-029, author = {Mikael Pettersson}, title = {A staged tag scheme for {E}rlang}, institution = it, department = csd, year = 2000, number = {2000-029}, month = nov, abstract = {The runtime systems in Ericsson's implementations of the Erlang programming language, up to and including release R6B, use a simple tag scheme which allows for fast computation of an object's type. However, the tag scheme also restricts all Erlang objects to a 28- or 30-bit address space. This is problematic when Erlang is ported to new systems, and reduces reliability for applications needing large amounts of memory. This paper describes the development of a new \emph{staged} tag scheme, which was designed to \emph{not} restrict the range of pointers, and thus eliminate the source of the abovementioned problems. Secondary benefits also followed: the staged tag scheme is more flexible, and, perhaps surprisingly, is actually more efficient. The staged tag scheme has been integrated into Ericsson's Erlang code, and is a standard feature starting with release R7A. } } @TechReport{ it:2000-028, author = {Bharath Bhikkaji and Torsten S{\"o}derstr{\"o}m}, title = {Reduced order models for diffusion systems using singular perturbations}, institution = it, department = syscon, year = 2000, number = {2000-028}, month = nov, abstract = {In this paper, we consider a special case of the one dimensional heat diffusion across a homogeneous wall. This physical system is modeled by a linear partial differential equation, which can be thought of as an infinite dimensional dynamic system. To simulate this physical system, one has to approximate the underlying infinite order system by a finite order approximation. In this paper we first construct a simple and straightforward approximate finite order model for the true system. The proposed approximate models may require large model order to approximate the true system dynamics in the high frequency regions. To avoid the usage of higher order models, we use a scheme similar to singular perturbations to further reduce the model order. } } @TechReport{ it:2000-027, author = {Emad Abd-Elrady}, title = {Convergence of the {RPEM} as Applied to Harmonic Signal Modeling}, institution = it, department = syscon, year = 2000, number = {2000-027}, month = oct, abstract = {Arbitrary periodic signals can be estimated recursively by exploiting the fact that a sine wave passing through a static nonlinear function generates a spectrum of overtones. The estimated signal model is hence parameterized as a real wave with unknown period in cascade with a piecewise linear function. The driving periodic wave can be chosen depending on any prior knowledge. The performance of a recursive Gauss-Newton prediction error identification algorithm for joint estimation of the driving frequency and the parameters of the nonlinear output function is therefore studied. A theoretical analysis of local convergence to the true parameter vector as well as numerical examples are given. Furthermore, the Cramer-Rao bound (CRB) is calculated in this report. } } @TechReport{ it:2000-026, author = {Henrik Brand{\'e}n}, title = {Numerical Boundary Conditions for {ODE}}, institution = it, department = tdb, year = 2000, number = {2000-026}, month = oct, optnote = {Submitted to Numerische Mathematik}, abstract = {We consider consistent finite difference approximations of ordinary differential equations, and in particular, parasitic solutions. A framework is introduced, representing a discrete solution as a sum of the true solution and a number of parasitic solutions. We show that within this framework, finite difference equations can be analysed using theory of ordinary differential equations, simplifying the analysis considerably. As an example we give a simple recipe on how to construct numerical boundary conditions such that the solution converges with expected accuracy.} } @TechReport{ it:2000-025, author = {Huimin Lin and Wang Yi}, title = {A Complete Axiomatisation for Timed Automata}, institution = it, department = docs, year = 2000, number = {2000-025}, month = sep, note = {A short version of this paper will be included in the proceedings of 20th FST-TCS, 2000}, abstract = {Timed automata has been recognised as a fundamental model for real time systems, but it still lacks a satisfactory algebraic theory. This paper fills the gape by presenting a complete proof system for timed automata, in which the equalities between pairs of timed automata that are timed bisimilar can be derived. The proof of the completeness result relies on the introduction of the notion of symbolic timed bisimulation. } } @TechReport{ it:2000-024, author = {Saul Abarbanel and Adi Ditkowski and Bertil Gustafsson}, title = {On error bounds of finite difference approximations to partial differential equations - temporal behavior and rate of convergence}, institution = it, department = tdb, year = 2000, number = {2000-024}, month = sep, abstract = {This paper considers a family of spatially semi-discrete approximations, including boundary treatments, to hyperbolic and parabolic equations. We derive the dependence of the error-bounds on time as well as on mesh size.} } @TechReport{ it:2000-023, author = {Per L{\"o}tstedt and Stefan S{\"o}derberg and Alison Ramage and Lina Hemmingsson-Fr{\"a}nd{\'e}n}, title = {Implicit solution of hyperbolic equations with space-time adaptivity}, institution = it, department = tdb, year = 2000, number = {2000-023}, month = sep, abstract = {Adaptivity in space and time is introduced to control the error in the numerical solution of hyperbolic partial differential equations. The equations are discretised by a finite volume method in space and an implicit linear multistep method in time. The computational grid is refined in blocks. At the boundaries of the blocks, there may be jumps in the step size. Special treatment is needed there to ensure second order accuracy and stability. The local truncation error of the discretisation is estimated and is controlled by changing the step size and the time step. The global error is obtained by integration of the error equations. In the implicit scheme, the system of linear equations at each time step is solved iteratively by the GMRES method. Numerical examples executed on a parallel computer illustrate the method. } } @TechReport{ it:2000-022, author = {Johan Runeson and Sven-Olof Nystr{\"o}m and Jan Sj{\"o}din}, title = {Optimizing Code Size through Procedural Abstraction}, institution = it, department = csd, year = 2000, number = {2000-022}, month = sep, note = {An extended abstract appears in the Proceedings of the ACM SIGPLAN 2000 Workshop on Languages, Compilers, and Tools for Embedded Systems (LCTES'2000)}, abstract = {Memory size is an important economic factor in the development of embedded systems. It is therefore desirable to find compiler optimization techniques that reduce the size of the generated code. One such code compression technique is procedural abstraction, where repeated occurrences of equivalent code fragments are factored out into new subroutines. Previously, procedural abstraction has been applied at the machine code level in optimizing linkers or binary rewriting tools. We investigate the prospects of applying procedural abstraction at the intermediate code level of a whole-program optimizing C compiler. Placing the optimization before register allocation makes it target independent, and will allow us to take full advantage of powerful code selection, register allocation and code scheduling techniques. } } @TechReport{ it:2000-021, author = {Parosh Aziz Abdulla and Aletta Nyl{\'e}n}, title = {{BQO}s and Timed Petri Nets}, institution = it, department = docs, year = 2000, number = {2000-021}, month = aug, abstract = {In this paper, we use the theory of better quasi-orderings to define a methodology for inventing constraint systems which are both well quasi-ordered and compact. We apply our methodology by presenting new constraint systems for verification of systems with unboundedly many real-valued clocks, and use them for checking safety properties for lazy (non-urgent) timed Petri nets where each token is equipped with a real-valued clock.} } @TechReport{ it:2000-020, author = {Karl H{\"o}rnell and Per L{\"o}tstedt}, title = {Time step selection for shock problems}, institution = it, department = tdb, year = 2000, number = {2000-020}, month = aug, abstract = {The solution to a conservation law is integrated in time by an embedded Runge-Kutta method. The time steps are chosen so that a bound on the local error is satisfied. At discontinuities such as shocks in the solution the time step is too pessimistic. By filtering the error estimate the time steps are determined by the smooth parts of the solution. The technique is justified theoretically and in numerical experiments. } } @TechReport{ it:2000-019, author = {Torsten S{\"o}derstr{\"o}m and Bharath Bhikkaji}, title = {Reduced order models for diffusion systems}, institution = it, department = syscon, year = 2000, number = {2000-019}, month = aug, abstract = {Mathematical models for diffusion processes like heat propagation, dispersion of pollutants etc., are normally partial differential equations which involve certain unknown parameters. To use these mathematical models as the substitutes of the true system, one has to determine these parameters. Partial differential equations (PDE) of the form \bea \frac{\partial u(x,t)}{\partial t} = \mathcal{L} u(x,t) \label{eq1.1} \eea where $ \mathcal{L}$ is a linear differential (spatial) operator, describe infinite dimensional dynamical systems. To compute a numerical solution for such partial differential equations, one has to approximate the underlying system by a finite order one. By using this finite order approximation, one then computes an approximate numerical solution for the PDE. We consider a simple case of heat propagation in a homogeneous wall. The resulting partial differential equation, which is of the form (\ref{eq1.1}), is approximated by finite order models by using certain existing numerical techniques like Galerkin and Collocation etc. These reduced order models are used to estimate the unknown parameters involved in the PDE, by using the well developed tools of system identification. In this paper we concentrate more on the model reduction aspects of the problem. In particular, we examine the model order reduction capabilities of the Chebyshev polynomial methods used for solving partial differential equation. } } @TechReport{ it:2000-018, author = {Torsten S{\"o}derstr{\"o}m and Bharath Bhikkaji}, title = {Reduced order models for diffusion systems via collocation methods}, institution = it, department = syscon, year = 2000, number = {2000-018}, month = aug, abstract = {Mathematical models for diffusion processes like heat propagation, dispersion of pollutants etc., are normally partial differential equations involving unknown parameters. For practical use, one has to estimate these parameters. In this paper we consider a simple case of heat propagation in a homogeneous wall. The resulting partial differential equation is solved using numerical techniques and tools of system identification are used to estimate the unknown parameters. In particular we examine the effect of model order selection when a Chebyshev collocation method is applied for solving partial differential equations. } } @TechReport{ it:2000-017, author = {Arne Andersson and Per Carlsson and Fredrik Ygge}, title = {Resource Allocation With Noisy Functions}, institution = it, department = csd, year = 2000, number = {2000-017}, month = aug, abstract = {We consider resource allocation with separable objective functions defined over subranges of the integers. While it is well known that (the maximisation version of) this problem can be solved efficiently if the objective functions are concave, the general problem of resource allocation with functions that are not necessarily concave is difficult. In this article we show that for a large class of problem instances with noisy objective functions the optimal solutions can be computed efficiently. We support our claims by experimental evidence. Our experiments show that our algorithm in hard and practically relevant cases runs up to 40 - 60 times faster than the brute force testing of all possible solutions. } } @TechReport{ it:2000-016, author = {Ken Mattsson}, title = {Imposing Boundary Conditions with the Injection, the Projection and the Simultaneous Approximation Term Method}, institution = it, department = tdb, year = 2000, number = {2000-016}, month = jul, abstract = {Four different methods of imposing boundary conditions for the linear advection-diffusion equation and a linear hyperbolic system are considered. The methods are analyzed using the energy method and the Laplace transform technique. Numerical calculations are done, considering in particular the case when the initial data and boundary data are inconsistent. } } @TechReport{ it:2000-015, author = {Petre Stoica and Girish Ganesan}, title = {On a Parameter Estimation Algorithm for {MA} Time Series.}, institution = it, department = syscon, year = 2000, number = {2000-015}, month = jun, abstract = {We take a close look at an algorithm for MA(1) parameter estimation. We provide a thorough convergence analysis of that algorithm in the scalar case, discuss its main advantages and disadvantages, present a possible extension of it to the MA($n$) case, and compare it with a recent alternative algorithm that appears to be one of the best available choices for MA parameter estimation.} } @TechReport{ it:2000-014, author = {Sergei Vorobyov}, title = {Better Decision Algorithms for Parity Games and the Mu-Calculus Model Checking}, institution = it, department = csd, year = 2000, number = {2000-014}, month = jun, abstract = {We suggest an algorithm with the asymptotically best behavior among currently known algorithms for the problems enumerated in the title, when the number of alternations $k$ is $\Omega(n^{\frac{1}{2}+\varepsilon})$, where $n$ is the number of states and $0<\varepsilon\leq\frac{1}{2}$. The best previously known algorithm \cite{BrowneClarkeJhaLongMarrero97} runs in time $O(k^2\cdot n\cdot \sqrt{n}^k)$ and uses approximately the same space. For comparison, our algorithm for $k=n$ (the most difficult case) runs in time $O(n^3\cdot (1.61803)^k)$ and uses a small polynomial space. We also show, for the first time, that there is a \emph{subexponential} randomized algorithms for the problem when $k=\Omega(n^{\frac{1}{2}+\varepsilon})$. It was an open problem as to whether such algorithms exist at all. } } @TechReport{ it:2000-013, author = {Marcus Johansson}, title = {Loss of High Order Spatial Accuracy Due to Boundary Error Caused by Runge-Kutta Time Integration}, institution = it, department = tdb, year = 2000, number = {2000-013}, month = may, abstract = {Explicit Runge--Kutta methods have frequently been used for solving initial boundary value problems with the method of lines. For linear and certain non-linear problems like the inviscid Burgers' equation, the correct specification of Dirichlet boundary conditions at the intermediate Runge--Kutta stages can be derived analytically. For general non-linear PDEs and general boundary conditions, it is currently not known how to find consistent analytical boundary conditions that do not lower the formal accuracy of the scheme. There are some numerical approaches that gain full accuracy but lead to deteriorated stability conditions. Here we focus on solving non-linear wave like equations using high-order finite difference methods. We examine the properties of an inconsistent boundary treatment and make comparisons with a correct one when applicable. We examine the effect of introducing viscosity. We contrast fourth order Runge--Kutta and Adams--Bashforth time integrators.} } @TechReport{ it:2000-012, author = {Maria Karlsson and Fredrik Ygge}, title = {Market-based Approaches to Optimization}, institution = it, department = csd, year = 2000, number = {2000-012}, month = may, abstract = {The use of markets has been proposed to a number of resource allocation/optimization problems, as such approaches often have a number of conceptual advantages. However, most examples found in the literature are rather ad hoc. In this article we present a general definition of what constitutes a market-oriented approach to optimization. We demonstrate how this general framework can be used to conceptually improve two well-known approaches from the literature, and discuss computational properties of the different approaches. We also present some general theory and show that the theory of the two approaches under investigation is special cases of this theory. } } @TechReport{ it:2000-011, author = {Arne Andersson and Fredrik Ygge}, title = {Managing Large Scale Computational Markets}, institution = it, department = csd, year = 2000, number = {2000-011}, month = apr, abstract = {General equilibrium theory has been proposed for resource allocation in computational markets. The basic procedure is that agents submit bids and that a resource (re)allocation is performed when a set of prices (one for each commodity) is found such that supply meets demand for each commodity. For successful implementation of large markets based on general equilibrium theory, efficient algorithms for finding the equilibrium are required. We discuss some drawbacks of current algorithms for large scale equilibrium markets and present a novel distributed algorithm, CoTree, which deals with the most important problems. CoTree is communication sparse, fast in adapting to preference changes of a few agents, have minimal requirements on local data, and is easy to implement. } } @TechReport{ it:2000-010, author = {Jakob Carlstr{\"o}m}, title = {Efficient Approximation of Values in Gain scheduled Routing}, institution = it, department = docs, year = 2000, number = {2000-010}, month = apr, abstract = {This paper shows how the cost of computation and memory of a previously proposed routing algorithm can be reduced. The routing algorithm, intended for communications networks carrying multiple call classes, is based on gain scheduling of precomputed relative value functions. Each relative value function is computed by solving a reinforcement learning problem, and defines a link admission control policy. We propose a method for automatically selecting points in a grid of per-class arrival intensities, for which relative value functions are computed. After construction of the grid, relative values are computed by interpolation. The numerical studies of routing in a network with two call classes show that less than 30 relative value functions are needed to avoid performance degradation. } } @TechReport{ it:2000-009, author = {Jakob Carlstr{\"o}m and Ernst Nordstr{\"o}m}, title = {Gain Scheduled Routing in Multi-Service Networks}, institution = it, department = docs, year = 2000, number = {2000-009}, month = apr, abstract = {This paper presents gain scheduled routing, a new scheme for reward-maximization in multi-service loss networks. Per-link control policies and their expected future rewards are precomputed using Markov decision theory and stored in lookup tables. Selection of appropriate table entries is based on measured or predicted call arrival rates. An approximation is introduced, which makes it possible to use identical lookup tables on links having the same capacities, although the links carry calls with different link reward parameters. Simulations with Poisson arrival processes show that gain scheduled routing offers significant improvement of the average reward rate, compared to least loaded path routing. Promising results are also obtained in simulations with self-similar call traffic. } } @TechReport{ it:2000-008, author = {Jakob Carlstr{\"o}m}, title = {Predictive Gain Scheduling for Control of Self-Similar Call Traffic in Broadband Networks}, institution = it, department = docs, year = 2000, number = {2000-008}, month = apr, abstract = {This paper presents and evaluates a gain scheduling approach to solving the admission control and routing problems for self-similar call arrival processes. The control problem is decomposed into two sub-problems: prediction of near-future call arrival rates and computation of control policies for Poisson arrival processes. At decision time, the predicted arrival rates are used to select one of the control policies. The rate predictions are made by neural networks, trained on-line, and the control policies are computed using standard techniques for Markov decision processes. In simulations, this method achieves higher link utilization than methods which do not exploit the memory of the arrival process. It also adapts to the network traffic considerably faster than a previously presented controller employing reinforcement learning without decomposition of the problem. } } @TechReport{ it:2000-007, author = {Lars Ferm and Per L{\"o}tstedt}, title = {On numerical errors in the boundary conditions of the {E}uler equations}, institution = it, department = tdb, year = 2000, number = {2000-007}, month = apr, abstract = {Numerical errors in solution of the Euler equations of fluid flow are studied. The error equations are solved to analyze the propagation of the discretization errors. In particular, the errors caused by the boundary conditions and their propagation are investigated. Errors generated at a wall are transported differently in subsonic and supersonic flow. Accuracy may be lost due to the accumulation of errors along walls. This can be avoided by increasing the accuracy of the boundary conditions. Large errors may still arise locally at the leading edge of a wing profile. There, a fine grid is the best way to reduce the error. } } @TechReport{ it:2000-006, author = {Elisabeth Larsson and Sverker Holmgren}, title = {A Parallel Domain Decomposition Method for the {H}elmholtz Equation}, institution = it, department = tdb, year = 2000, number = {2000-006}, month = apr, abstract = {A parallel solver for the Helmholtz equation in a domain consisting of layers with different material properties is presented. A fourth-order accurate finite difference discretization is used. The arising system of equations is solved with a preconditioned Krylov subspace method. A domain decomposition framework is employed, where fast transform subdomain preconditioners are used. Three ways of treating the Schur complement of the preconditioner are investigated, and the corresponding preconditioned iterative methods are compared with a standard direct method. It is noted that the convergence rate of the iterative methods is closely related to how the Schur complement system for the preconditioner is formed, and how accurately it is solved. However, in almost all cases, the gain in both memory requirements and arithmetic complexity is large compared with the direct method. Furthermore, the gain increases with problem size, allowing problems with many unknowns to be solved efficiently. The efficiency is further improved by parallelization using message-passing, enabling us to solve even larger Helmholtz problems in less time.} } @TechReport{ it:2000-005, author = {P{\"a}r Samuelsson and Bengt Carlsson}, title = {Feedforward control of the external carbon flow rate in an activated sludge process}, institution = it, department = syscon, year = 2000, number = {2000-005}, month = mar, note = {A short version will appear at the 1st World Congress of the IWA, Paris 3-7 July 2000}, abstract = {Biological nitrogen removal in an activated sludge process is obtained by two biological processes; nitrification and denitrification. Denitrifying bacterias need sufficient amounts of readily metabolized carbon. The objective of this paper is to develop an automatic control strategy for adjusting the external carbon flow rate so that the nitrate concentration in the last anoxic compartment is kept at a low pre-specified level. A simple model based feedforward control combined with a standard feedback PI controller is suggested. The feedforward part is based on a steady state analysis of a simplified ASM1 model. Simulation results show that the suggested controller, despite simple, effectively attenuate process disturbances.} } @TechReport{ it:2000-004, author = {Bengt G{\"o}ransson and Jan Gulliksen}, title = {Anv{\"a}ndarcentrerad systemutveckling}, institution = it, department = hci, year = 2000, number = {2000-004}, month = mar, note = {In Swedish. Also available as report TRITA-NA-D~0005, CID-71, KTH, Stockholm, Sweden. An extended and revised version is available in the book \emph{Anv{\"a}ndarcentrerad systemdesign}, see \url{http://acsd.hci.uu.se}}, abstract = {Denna rapport beskriver grunderna i anv{\"a}ndarcentrerad systemutveckling och hur detta f{\"o}rh{\aa}ller sig till ett antal systemutvecklingsmodeller. Den beskriver anv{\"a}ndarmedverkan, projektstyrningsaspekter, metoder och roller i arbetet. Den relaterar hela tiden till det arbete som vi bedrivit tillsammans med Riksskatteverket och rapporterar ocks{\aa} om ett antal observationer som gjorts under de fem {\aa}r av samverkan med Riksskatteverket som vi haft. S{\"a}rskild tonvikt har lagts p{\aa} att anv{\"a}ndarmedverkan skall vara effektiv i systemutvecklingsprocessen. Syftet med denna rapport {\"a}r att f{\"o}rklara varf{\"o}r ett anv{\"a}ndarcentrerat syns{\"a}tt {\"a}r n{\"o}dv{\"a}ndigt i organisationen, vad man vinner p{\aa} detta och vilka fallgropar man skall undvika. Det kan tj{\"a}na som ett underlag f{\"o}r att bedriva ett anv{\"a}ndarcentrerat utvecklingsprojekt, men ocks{\aa} som underlag/krav f{\"o}r upphandling av en systemutvecklingsmodell och vad en s{\aa}dan modell b{\"o}r kompletteras med f{\"o}r att bli anv{\"a}ndarcentrerad. Den riktar sig till verksamhetsf{\"o}retr{\"a}dare och it-strateger p{\aa} l{\aa}g som h{\"o}g niv{\aa}. } } @TechReport{ it:2000-003, author = {Lars Ferm and Per L{\"o}tstedt}, title = {Adaptive error control for steady state solutions of inviscid flow}, institution = it, department = tdb, year = 2000, number = {2000-003}, month = feb, abstract = {The steady state solution of the Euler equations of inviscid flow is computed by an adaptive method. The grid is structured and is refined and coarsened in predefined blocks. The equations are discretized by a finite volume method. Error equations, satisfied by the solution errors, are derived with the discretization error as the driving right hand side. An algorithm based on the error equations is developed for errors propagated along streamlines. Numerical examples from two-dimensional compressible and incompressible flow illustrate the method. } } @TechReport{ it:2000-002, author = {Biniam Gebremichael and Kidane Yemane}, title = {Linux Networking}, institution = it, department = docs, year = 2000, number = {2000-002}, month = jan, note = {M.Sc. thesis}, abstract = {Educational institutions and other groups with limited resources can enjoy the full feature of the Unix operating system by using the free operating system Linux. In this thesis we present a brief discussion on the most popular operating systems in use with a special emphasis on Linux. We provide information and guidelines starting from Linux installation all the way to setting up a Local Area Network (LAN). Included is a discussion and configuration of a dial-up network, a wireless network and LAN security.} } @TechReport{ it:2000-001, author = {Richard Carlsson}, title = {Extending {E}rlang with structured module packages}, institution = it, department = csd, year = 2000, number = {2000-001}, month = jan, abstract = {This article describes a way to extend \textsc{\mbox{Erlang}} with structured program module packages, in a simple, straightforward and useful way.} } @TechReport{ it:1999-016, author = {Arne Andersson and Mattias Tenhunen and Fredrik Ygge}, title = {Integer Programming for Automated Auctions}, institution = it, department = csd, year = 1999, number = {1999-016}, month = nov, note = {Final version published at ICMAS-00 available from \url{http://www.csd.uu.se/~arnea/abs/icmas00.html}}, abstract = {Auctions allowing bids for combinations of items are important for (agent mediated) electronic commerce; compared to other auction mechanisms, they often increase the efficiency of the auction, while keeping risks for bidders low. The determination of an optimal winner combination in this type of auctions is a complex computational problem, which has recently attracted some research, and in this paper, we look further into the topic. It is well known that the winner determination problem for a certain class of auctions is equivalent to what in the operations research community is referred to as (weighted) set packing. In this paper we compare some of the recent winner determination algorithms to traditional set packing algorithms, and study how more general auctions can be modeled by use of standard integer programming methods. } } @TechReport{ it:1999-015, author = {Bertil Gustafsson and Jonas Nilsson}, title = {Boundary Conditions and Estimates for the Steady Stokes Equations on Staggered Grids}, institution = it, department = tdb, year = 1999, number = {1999-015}, month = nov, abstract = {We consider the steady state Stokes equations, describing low speed flow and derive estimates of the solution for various types of boundary conditions. We formulate the boundary conditions in a new way, such that the boundary value problem becomes non-singular. By using a difference approximation on a staggered grid we are able to derive a non-singular approximation in a direct way. Furthermore, we derive the same type of estimates as for the continuous case. Numerical experiments confirm the theoretical results. } } @TechReport{ it:1999-014, author = {Bertil Gustafsson}, title = {The {G}odunov-{R}yabenkii condition: The beginning of a new stability theory}, institution = it, department = tdb, year = 1999, number = {1999-014}, abstract = {The analysis of difference methods for initial-boundary value problems was difficult during the first years of the development of computational methods for PDE. The Fourier analysis was available, but of course not sufficient for nonperiodic boundary conditions. The only other available practical tool was an eigenvalue analysis of the evolution difference operator Q. Actually, there were definitions presented, that defined an approximation as stable if the eigenvalues of Q were inside the unit circle for a fixed step-size h. In the paper ``Special criteria for stability for boundary-value problems for non-self-adjoint difference equations'' by S.K. Godunov and V.S. Ryabenkii in 1963, the authors presented an analysis of a simple difference scheme that clearly demonstrated the shortcomings of the eigenvalue analysis. They also gave a new definition of the spectrum of a family of operators, and stated a new necessary stability criterion. This criterion later became known as the Godunov-Ryabenkii condition, and it was the first step towards a better understanding of initialboundary value problems. The theory was later developed in a more general manner by Kreiss and others, leading to necessary and sufficient conditions for stability. In this paper we shall present the contribution by Godunov and Ryabenkii, and show the connection to the general Kreiss theory.}, month = nov } @TechReport{ it:1999-013, author = {Huimin Lin and Wang Yi}, title = {A Proof System for Timed Automata}, institution = it, department = docs, year = 1999, number = {1999-013}, month = nov, note = "To be included in the proceedings of FOSSACS'00", abstract = {A proof system for timed automata is presented, based on a CCS-style language for describing timed automata. It consists of the standard monoid laws for bisimulation and a set of inference rules. The judgements of the proof system are \emph{conditional equations} of the form $\phi\rhd t=u$ where $\phi$ is a clock constraint and $t$, $u$ are terms denoting timed automata. It is proved that the proof system is complete over the recursion-free subset of the language. The completeness proof relies on the notion of \emph{symbolic timed bisimulation}. Two variations of the axiomatisation are also discussed, one on timed automata by associating an invariant constraint to each node and the other on bisimulation by abstracting away delay transitions.} } @TechReport{ it:1999-012, author = {Erik G. Larsson and Petre Stoica}, title = {Array Signal Processing with Incomplete Data}, institution = it, department = syscon, year = 1999, number = {1999-012}, month = nov, abstract = {This paper considers the problem of estimating the Direction-of-Arrival (DOA) of one or more signals using an array of sensors, where some of the sensors fail to work before the measurement is completed. Methods for estimating the array output covariance matrix are discussed. In particular, the Maximum-Likelihood estimate of this covariance matrix and its asymptotic accuracy are derived and discussed. Different covariance matrix estimates are used for DOA estimation together with the MUSIC algorithm and with a covariance matching technique. In contrast to MUSIC, the covariance matching technique can utilize information on the estimation accuracy of the array covariance matrix, and it is demonstrated that this yields a significant performance gain. } } @TechReport{ it:1999-008, author = {Bj{\"o}rn Victor and Wang Yi (Eds.)}, title = {Proceedings of the 11th Nordic Workshop on Programming Theory}, institution = it, department = docs, year = 1999, number = {1999-008}, month = oct, url = {http://www.it.uu.se/research/publications/reports/1999-008/nwpt99/proceedings/} } @TechReport{ it:1999-007, author = {Andreas Jakobsson and Marple, Jr., S. Lawrence and Petre Stoica}, title = {Two-Dimensional Capon Spectrum Analysis}, institution = it, department = syscon, year = 1999, number = {1999-007}, month = sep, abstract = {We present a computationally efficient algorithm for computing the 2-D Capon spectral estimator. The implementation is based on the fact that the 2-D data covariance matrix will have a \emph{Toeplitz-Block-Toeplitz structure}, with the result that the inverse covariance matrix can be expressed in closed form by using a special case of the Gohberg-Heinig formula that is a function of strictly the forward 2-D prediction matrix polynomials. Furthermore, we present a novel method, based on a 2-D lattice algorithm, to compute the needed forward prediction matrix polynomials and discuss the difference in the so-obtained 2-D spectral estimate as compared to the one obtained by using the prediction matrix polynomials given by the Whittle-Wiggins-Robinson algorithm. Numerical simulations illustrate the clear computational gain in comparison to both the well-known classical implementation and the method recently published by Liu et al.} } @TechReport{ it:1999-006, author = {Andreas Gustavsson and Mattias Ersson}, title = {Formalizing the Intent of Design Patterns. An Approach Towards a Solution to the Indexing Problem}, institution = it, department = docs, year = 1999, number = {1999-006}, month = jul, abstract = {The \emph{intent} section of a pattern description is written in easily understood, natural language, which unfortunately has the drawback of being too imprecise and unstructured for certain applications of the \emph{intent} section. We will in this essay try to formalize the intent section of a pattern description. Our aim will be to find a structure within the \emph{intent} description that will reduce ambiguities and at the same time make the classification of patterns easier. The classifications of patterns addresses the problem of ``labeling'' patterns into one of the following categories: Creational, Structural or Behavioural. Succeeding in classifying patterns by the \emph{intent} does require that enough information for doing so is contained in the one to two sentences that make up the \emph{intent}. Whether this is the case or not will be discussed in the essay. A formalized \emph{intent} section of a pattern description can not replace the understandability of the natural language description but can be thought of as a complement to the standard structure to patterns today.} } @TechReport{ it:1999-005, author = {Anders Jansson and Erik Lindberg and Eva Olsson}, title = {Trafiks{\"a}kerhet och informationsmilj{\"o} i t{\aa}gf{\"o}rarsystemet. Litteratur{\"o}versikt och olycksfallsanalyser}, institution = it, department = hci, year = 1999, number = {1999-005}, month = jun, note = {In Swedish}, abstract = {This literature survey focuses on studies of the train driver system and its connections to other parts of the larger train traffic system. The main part of the studies concern Swedish conditions, but other references are included as well. In the first part, different ways of analysing accidents are discussed, including organisational aspects. In the second part, research results from different areas, all assumed to be relevant to the train driver task, are aggregated into a body of knowledge about the train driver system. Comparisons are made between the train driver task and other operator tasks. Further, the effects of a higher degree of automation are discussed. The task of driving a train is modelled as a dynamic decision task, where the driver's mental representation of the technical system and the optic flow of information are assumed to be important parts of the train driver task. In the last part of the literature survey, the content of 40 accident reports is discussed. Further, an analysis and a classification of Swedish train accidents are made by using CREAM (Hollnagel, 1998). The content of the reports, as well as the CREAM-method are evaluated, and different classification criteria are discussed.} } @TechReport{ it:1999-004, author = {Amnon H. Eden and Yoram Hirshfeld and Amiram Yehudai}, title = {Towards a Mathematical Foundation for Design Patterns}, institution = it # {, and Tel Aviv University}, department = docs, year = 1999, number = {1999-004}, month = may, abstract = {We identify a compact and sufficient set of building blocks which constitute most design patterns of the GoF catalog: uniform sets of classes or functions, function clans, class hierarchies, and regularities (morphisms) thereof. The abstractions observed are manifested within a model in symbolic logic and defined in LePUS, a declarative, higher order language. LePUS formulae concisely prescribe design patterns in a general, complete, and accurate manner. We provide a natural, condensed graphic notation for every LePUS formula and demonstrate how design patterns are faithfully portrayed by diagrams in this notation. We conclude by defining refinement (specialization) between patterns by means of predicate calculus and illustrate how the logical formalism of LePUS facilitates tool support for the recognition and implementation of design patterns.} } @TechReport{ it:1999-003, author = {Torsten S{\"o}derstr{\"o}m and Susanne Halvarsson}, title = {Parameter Estimation for Diffusion Models}, institution = it, department = syscon, year = 1999, number = {1999-003}, month = may, abstract = {In many applications, for example in heat diffusion and in flow problems, it is important to describe the process behavior inside the particular medium. An example can be the strive for estimating certain parameters related to the material. This paper describes how the diffusion, modeled by a partial differential equation, can be solved using numerical methods and how results from the field of system identification can be utilized in order to estimate the parameters of interest.} } @TechReport{ it:1999-002, author = {Kristina Lundqvist and Lars Asplund}, title = {A Formal Model of a Ravenscar-Compliant Run-Time Kernel and Application Code}, institution = it, department = docs, year = 1999, number = {1999-002}, month = may, abstract = {The Ravenscar tasking profile for Ada95 has been designed to allow implementation of safety critical systems in Ada. Ravenscar defines a tasking run-time system with deterministic behaviour and low complexity. We provide a formal model of the primitives provided by Ravenscar. This formal model can be used to verify safety properties of applications targeting a Ravenscar-compliant run-time system. As an illustration of this, we model a sample application and formally verify its correctness using the real-time model checker UPPAAL.} } @TechReport{ it:1999-001, author = {Torsten S{\"o}derstr{\"o}m}, title = {Perturbation Results for Singular Values}, institution = it, department = syscon, year = 1999, number = {1999-001}, month = apr, abstract = {The singular values of a perturbed complex-valued matrix $A+\epsilon B + O(\epsilon^2)$ are shown to have singular values of the form $\sigma_i(\epsilon)=\sigma_i + k_i\epsilon + O(\epsilon^2)$. Explicit expressions for the $k_i$ coefficients are derived. The cases of zero singular values as well as multiple singular values are included in the analysis.} }