@techreport{TD:100561,
	att_abstract={{We study scheduling algorithms for loading data feeds into real time data 
warehouses, which are used in applications such as IP network monitoring, online financial 
trading, and credit card fraud detection. In these applications, the warehouse 
collects a large number of streaming data feeds that are generated by external 
sources and arrive asynchronously.  Data for each table in the warehouse are generated at a 
constant rate, different tables possibly at different rates.
For each data feed, the arrival of new data triggers an emph{update} that seeks to append 
the new data to the corresponding table; if multiple updates are pending for the 
same table, they are batched together before being loaded. 
At time $	au$, if a table has been updated with information up to time $r le 	au$, 
its emph{staleness} is defined as $	au - r$.  

Our first objective is to schedule the updates on one or more processors in a way that 
minimizes the total staleness.
In order to ensure fairness, our second objective is to limit the maximum ``stretch'',
which we define (roughly) as the 
ratio between the duration of time an update waits till it is
finished being processed,
and the length of the update.

In contrast to earlier work proving the nonexistence of constant-competitive algorithms 
for related scheduling problems, we prove that emph{any} online nonpreemptive algorithm, 
no processor of which is ever voluntarily idle, incurs a staleness at most a constant 
factor larger than an obvious lower bound on total staleness (provided that the processors 
are sufficiently fast). 
We give a constant-stretch algorithm,
provided that the processors are sufficiently fast,
for the emph{quasiperiodic model}, 
in which tables can be clustered into a few groups such that the update frequencies 
within each group vary by at most a constant factor.
Finally, we show that our constant-stretch algorithm is also
constant-competitive (subject to the same proviso on processor speed)
in the quasiperiodic model with respect to total weighted staleness, where tables 
are assigned weights that reflect their priorities.}},
	att_authors={lg1173, mh0357, hk1971},
	att_categories={},
	att_copyright={{Springer Science+Business Media}},
	att_copyright_notice={{The definitive version was published in Theory of Computing Systems Journal. {{, 2011-07-01}}
}},
	att_donotupload={},
	att_private={false},
	att_projects={},
	att_tags={},
	att_techdoc={true},
	att_techdoc_key={TD:100561},
	att_url={http://web1.research.att.com:81/techdocs_downloads/TD:100561_DS1_2011-06-08T18:35:44.639Z.pdf},
	author={Lukasz Golab and MohammadHossein Bateni and Mohammad Hajiaghayi and Howard Karloff},
	institution={{Theory of Computing Systems Journal}},
	month={July},
	title={{Scheduling to Minimize Staleness and Stretch in Real-Time Data Warehouses}},
	year=2011,
}