--- volumes.c.orig	2011-10-07 16:50:04.000000000 +0200
+++ volumes.c	2011-11-16 23:49:08.097085568 +0100
@@ -2329,6 +2329,8 @@ static int __btrfs_alloc_chunk(struct bt
 	u64 stripe_size;
 	u64 num_bytes;
 	int ndevs;
+	u64 fs_total_avail;
+	int opt_ndevs;
 	int i;
 	int j;
 
@@ -2404,6 +2406,7 @@ static int __btrfs_alloc_chunk(struct bt
 	 * about the available holes on each device.
 	 */
 	ndevs = 0;
+	fs_total_avail = 0;
 	while (cur != &fs_devices->alloc_list) {
 		struct btrfs_device *device;
 		u64 max_avail;
@@ -2448,6 +2451,7 @@ static int __btrfs_alloc_chunk(struct bt
 		devices_info[ndevs].total_avail = total_avail;
 		devices_info[ndevs].dev = device;
 		++ndevs;
+		fs_total_avail += total_avail;
 	}
 
 	/*
@@ -2456,6 +2460,20 @@ static int __btrfs_alloc_chunk(struct bt
 	sort(devices_info, ndevs, sizeof(struct btrfs_device_info),
 	     btrfs_cmp_device_info, NULL);
 
+	/*
+	 * do not allocate space on all devices
+	 * instead balance free space to maximise space utilization
+	 * (this needs tweaking if parity raid gets implemented
+	 * for n parity ignore the n first (after sort) devs in the sum and division)
+	 */
+	opt_ndevs = fs_total_avail / devices_info[0].total_avail;
+	if (opt_ndevs >= ndevs)
+		opt_ndevs = ndevs - 1; //optional, might be used for faster dev remove?
+	if (opt_ndevs < devs_min)
+		opt_ndevs = devs_min;
+	if (ndevs > opt_ndevs)
+		ndevs = opt_ndevs;
+
 	/* round down to number of usable stripes */
 	ndevs -= ndevs % devs_increment;
 
