--- hvirtual-cvs/cinelerra/maskengine.C	2003-10-05 00:06:15.000000000 +0200
+++ hvirtual-1.1.8/cinelerra/maskengine.C	2004-01-24 00:56:47.000000000 +0100
@@ -9,6 +9,28 @@
 #include <stdint.h>
 #include <string.h>
 
+
+int64_t get_difference(struct timeval *start_time)
+{
+        struct timeval new_time;
+
+	gettimeofday(&new_time, 0);
+
+	new_time.tv_usec -= start_time->tv_usec;
+	new_time.tv_sec -= start_time->tv_sec;
+	if(new_time.tv_usec < 0)
+	{
+		new_time.tv_usec += 1000000;
+		new_time.tv_sec--;
+	}
+
+	return (int64_t)new_time.tv_sec * 1000000 + 
+		(int64_t)new_time.tv_usec;
+
+}
+
+
+
 MaskPackage::MaskPackage()
 {
 	apply_mutex = new Mutex;
@@ -48,39 +70,25 @@
 
 
 
+short row_spans[OVERSAMPLE*576][2000];
+int total_sp = 0;
 
 
-
-
-
-
-
-
-#define DRAW_LINE_CLAMPED(type, value) \
-{ \
-	type **rows = (type**)frame->get_rows(); \
- \
-	if(draw_y2 != draw_y1) \
+#define INIT_ROWSPANS \
+	for (int i = 0; i<OVERSAMPLE*576; i++)  \
 	{ \
-		float slope = ((float)draw_x2 - draw_x1) / ((float)draw_y2 - draw_y1); \
-		int w = frame->get_w() - 1; \
-		int h = frame->get_h(); \
- \
-		for(float y = draw_y1; y < draw_y2; y++) \
-		{ \
-			if(y >= 0 && y < h) \
-			{ \
-				int x = (int)((y - draw_y1) * slope + draw_x1); \
-				int y_i = (int)y; \
-				int x_i = CLIP(x, 0, w); \
- \
-				if(rows[y_i][x_i] == value) \
-					rows[y_i][x_i] = 0; \
-				else \
-					rows[y_i][x_i] = value; \
-			} \
-		} \
-	} \
+		row_spans[i][0]=2; \
+		row_spans[i][1]=2000;	\
+	};
+
+void add_to_rowspan(int x, int y) {
+	short *span = row_spans[y];	
+	span[span[0]] = x;
+	span[0] ++;
+	if (span[0] > span[1]) {
+	// black magick
+	};
+	total_sp ++;
 }
 
 
@@ -93,11 +101,11 @@
 	unsigned char k)
 {
 //printf("MaskUnit::draw_line_clamped 1 %d %d %d %d\n", x1, y1, x2, y2);
+	if (y1 == y2) return; 
 	int draw_x1;
 	int draw_y1;
 	int draw_x2;
 	int draw_y2;
-	unsigned char value;
 
 	if(y2 < y1)
 	{
@@ -114,16 +122,21 @@
 		draw_y2 = y2;
 	}
 
-	switch(frame->get_color_model())
-	{
-		case BC_A8:
-			DRAW_LINE_CLAMPED(unsigned char, k);
-			break;
-		
-		case BC_A16:
-			DRAW_LINE_CLAMPED(uint16_t, k);
-			break;
-	}
+	float slope = ((float)draw_x2 - draw_x1) / ((float)draw_y2 - draw_y1); 
+	int w = frame->get_w() - 1; 
+	int h = frame->get_h(); 
+	for(float y = draw_y1; y < draw_y2; y++) 
+	{ 
+		if (y >= h) 
+			return; // since y gets larger, there is no point in continuoing
+		else if(y >= 0) 
+		{ 
+			int x = (int)((y - draw_y1) * slope + draw_x1); 
+			int y_i = (int)y; 
+			int x_i = CLIP(x, 0, w); 
+			add_to_rowspan(x_i, y_i); 
+		} 
+	} 
 }
 
 void MaskUnit::blur_strip(float *val_p, 
@@ -407,14 +420,22 @@
 
 
 // Draw oversampled region of polygons on temp
+		int old_x, old_y;
+		int start_x, start_y;
+		old_x = -30000; // sentinel
+		total_sp = 0;
+struct timeval start_time;
+gettimeofday(&start_time, 0);
+
 		for(int k = 0; k < engine->point_sets.total; k++)
 		{
-			int old_x, old_y;
+			
 			unsigned char max = k + 1;
 			ArrayList<MaskPoint*> *points = engine->point_sets.values[k];
 
 			if(points->total < 3) continue;
 //printf("MaskUnit::process_package 2 %d %d\n", k, points->total);
+			INIT_ROWSPANS;
 			for(int i = 0; i < points->total; i++)
 			{
 				MaskPoint *point1 = points->values[i];
@@ -433,136 +454,220 @@
 				float x3 = point2->x;
 				float y3 = point2->y;
 
-				for(int j = 0; j <= segments; j++)
-				{
-					float t = (float)j / segments;
-					float tpow2 = t * t;
-					float tpow3 = t * t * t;
-					float invt = 1 - t;
-					float invtpow2 = invt * invt;
-					float invtpow3 = invt * invt * invt;
-
-					x = (        invtpow3 * x0
-						+ 3 * t     * invtpow2 * x1
-						+ 3 * tpow2 * invt     * x2 
-						+     tpow3            * x3);
-					y = (        invtpow3 * y0 
-						+ 3 * t     * invtpow2 * y1
-						+ 3 * tpow2 * invt     * y2 
-						+     tpow3            * y3);
+// code taken from Graphics Gems I and 
+// http://cvs.sourceforge.net/viewcvs.py/guliverkli/guliverkli/src/subtitles/Rasterizer.cpp?rev=1.3
+
+
+				float cx3, cx2, cx1, cx0, cy3, cy2, cy1, cy0;
+
+
+				// [-1 +3 -3 +1]
+				// [+3 -6 +3  0]
+				// [-3 +3  0  0]
+				// [+1  0  0  0]
+
+		 		cx3 = -  x0 + 3*x1 - 3*x2 + x3;
+				cx2 =  3*x0 - 6*x1 + 3*x2;
+				cx1 = -3*x0 + 3*x1;
+				cx0 =    x0;
+
+				cy3 = -  y0 + 3*y1 - 3*y2 + y3;
+				cy2 =  3*y0 - 6*y1 + 3*y2;
+				cy1 = -3*y0 + 3*y1;
+				cy0 =    y0;
+
+				float maxaccel1 = fabs(2*cy2) + fabs(6*cy3);
+				float maxaccel2 = fabs(2*cx2) + fabs(6*cx3);
+
+				float maxaccel = maxaccel1 > maxaccel2 ? maxaccel1 : maxaccel2;
+				float h = 1.0;
+
+				if(maxaccel > 8.0) h = sqrt(8.0 / maxaccel);
+
+				// check if it is first set
+			
 
+				for(float t = 0.0; t < 1.0; t += h)
+				{
+					x = cx0 + t*(cx1 + t*(cx2 + t*cx3));
+					y = cy0 + t*(cy1 + t*(cy2 + t*cy3));
 					y -= ptr->row1;
 					x *= OVERSAMPLE;
 					y *= OVERSAMPLE;
 
-					if(j > 0)
-					{
+					if (old_x != -30000) 
 						draw_line_clamped(temp, old_x, old_y, (int)x, (int)y, max);
+					else 
+					{
+						start_x = (int) x;
+						start_y = (int) y;
 					}
-
 					old_x = (int)x;
 					old_y = (int)y;
 				}
-			}
 
+				x = x3;
+				y = y3;
+				y -= ptr->row1;
+				x *= OVERSAMPLE;
+				y *= OVERSAMPLE;
+				draw_line_clamped(temp, old_x, old_y, (int)x, (int)y, max);
+				old_x = (int)x;
+				old_y = (int)y;
+		
+			}
 //printf("MaskUnit::process_package 1\n");
+			draw_line_clamped(temp, old_x, old_y, start_x, start_y, max);
 
 
-
-
-
-#define FILL_ROWS(type) \
-for(int i = 0; i < oversampled_package_h; i++) \
-{ \
-	type *row = (type*)temp->get_rows()[i]; \
-	int value = 0x0; \
-	int total = 0; \
- \
- 	for(int j = 0; j < oversampled_package_w; j++) \
-		if(row[j] == max) total++; \
- \
- 	if(total > 1) \
-	{ \
-		if(total & 0x1) total--; \
-		for(int j = 0; j < oversampled_package_w; j++) \
-		{ \
-			if(row[j] == max && total > 0) \
-			{ \
-				if(value)  \
-					value = 0x0; \
-				else \
-					value = max; \
-				total--; \
-			} \
-			else \
-			{ \
-				if(value) row[j] = value; \
-			} \
-		} \
-	} \
-}
-
-
-// Fill in the polygon in the horizontal direction
-			switch(temp->get_color_model())
-			{
-				case BC_A8:
-					FILL_ROWS(unsigned char);
-					break;
-
-				case BC_A16:
-					FILL_ROWS(uint16_t);
-					break;
+	// Fill in the polygon in the horizontal direction
+	// my filling
+			printf("oh : %i\n", oversampled_package_h);
+			int start_y = 0;
+			int end_y = 0;
+			for(int i = 0; i < oversampled_package_h; i++) 
+			{ 
+				unsigned char *row_to = (unsigned char *)temp->get_rows()[i]; 
+				short *span = row_spans[i];
+				if (span[0] & 1) // should never happen, but... hey you never know
+					printf("Error, odd number of spans line: %i, spans: %i\n", i, span[0]);
+				
+				if (span[0] > 2) 
+				{ 
+					if (!start_y) start_y = i / OVERSAMPLE + ptr->row1; // for later use
+					end_y = i / OVERSAMPLE + ptr->row1;                 // for later use
+
+					// sort the span, insertion sort, shouldn't be a bottleneck, since we have low number of spans (on avarage less then 10)
+					for (int l = 2; l < span[0]; l++) {
+						int j = l;
+						short temp = span[j];
+						while (j > 2 && span[j-1] > temp) {
+							span[j] = span[j-1];
+							j--;
+						}
+						span[j] = temp;
+					}
+				}
+				// ok spans are sorted... color it!
 			}
+		// Scaneline sampling 
+			for (int i = start_y; i < end_y; i++) 
+			{
+				int j;
+				short min_x = 30000;
+				short max_x = -30000;
+				short *span;
+				#define P (span[1])
+				#define MAXP (span[0])
+				int num_empty_spans = 0;
+				unsigned char *output_row = (unsigned char*)mask->get_rows()[i];
+				short value = (int)((float)engine->value / 100 * 0xff);
+				// ready the spans
+				for (j = 0; j < OVERSAMPLE; j++)
+				{	
+					span = row_spans[j + i * OVERSAMPLE];
+					P = 2; // starting pointers, use the second field
+					if (MAXP != 2) {
+						if (span[2] < min_x) min_x = span[2];
+						if (span[MAXP-1] > max_x) max_x = span[MAXP-1];
+					} else
+					{
+						num_empty_spans ++;	
+					}	
+				}
+				if (num_empty_spans == OVERSAMPLE)
+					continue; // no work for us
+				// for each pixel from the begining to the end, do the coverage calculation
+				min_x = min_x / OVERSAMPLE;
+				max_x = (max_x + OVERSAMPLE - 1) / OVERSAMPLE;
+				
+	//			printf("row %i, pixel range: %i %i, spans0: %i\n", i, min_x, max_x, row_spans[i*OVERSAMPLE][0]-2);
+				// this is not a full loop, since we adjust h often
+				for (int h = min_x; h <= max_x; h++) 
+				{
+					short pixelleft = h * OVERSAMPLE;
+					short pixelright = pixelleft + OVERSAMPLE - 1;
+					unsigned short coverage = 0;
+					int num_left = 0; // number of spans that have leftmost span left of next pixel
+					short right_end = 30000;    // leftmost end of any span - right end of a full scanline
+					short right_start = 30000;   // leftmost start of any span - left end of empty scanline
+
+					for (j=0; j< OVERSAMPLE; j++) 
+					{	
+						char chg = 1;
+						span = row_spans[j + i * OVERSAMPLE];
+						while (P < MAXP && chg)
+						{
+							if (span[P] <= pixelright)  // span is not on our right
+								coverage += MIN(span[P+1], pixelright) 
+		                                                          - MAX(span[P], pixelleft) + 1;
+							if (span[P+1] <= pixelright) 
+								P += 2;
+							else 
+								chg = 0;
+						} 
+						if (P == MAXP) 
+							num_left = -OVERSAMPLE; // just take care num_left cannot equal OVERSAMPLE or zero again
+						else	
+						{ 
+							if (span[P] <= pixelright)  // if span starts before subpixel in the pixel on the right 
+							{
+								num_left ++;						
+								if (span[P+1] < right_end) right_end = span[P+1]; 
+							} else 
+							{
+								if (span[P] < right_start) right_start = span[P]; 
+							}
+						}
+					}
+					// calculate coverage
+					coverage *= value;
+					if(OVERSAMPLE == 8) coverage >>= 6; \
+					else \
+					if(OVERSAMPLE == 4) coverage >>= 2; \
+					else \
+					if(OVERSAMPLE == 2) coverage >>= 2; \
+					else coverage /= OVERSAMPLE * OVERSAMPLE; \
+					
+					if (output_row[h] < coverage) // when we have multiple masks... we don't want aliasing inside areas
+						output_row[h] = coverage;
+			
+					if (num_left == OVERSAMPLE) 
+					{
+						// all spans start more left than next pixel
+						// this means we can probably (if lucky) draw a longer horizontal line
+						right_end = (right_end / OVERSAMPLE) - 1; // last fully covered pixel
+						if (right_end > h)
+						{
+							memset(output_row + h + 1, value, right_end - h);
+							h = right_end;  
+						}
+					} else 
+					if (num_left == 0) 
+					{
+						// if we are lucky we have an empty scanline ahead of us
+						right_start = (right_start / OVERSAMPLE) - 1; // last fully empty pixel
+						if (right_start > h)
+						{
+							h = right_start;
+						}
+					}
+				}
+				
+			}					
+			
 		}
+		int64_t dif= get_difference(&start_time);
+		printf("diff: %lli\n", dif);
+	}
 
 
 
 
 
-
-
-
-#define DOWNSAMPLE(type, value) \
-for(int i = 0; i < ptr->row2 - ptr->row1; i++) \
-{ \
-	type *output_row = (type*)mask->get_rows()[i + ptr->row1]; \
-	unsigned char **input_rows = (unsigned char**)temp->get_rows() + i * OVERSAMPLE; \
- \
- \
-	for(int j = 0; j < mask_w; j++) \
-	{ \
-		int64_t total = 0; \
- \
-/* Accumulate pixel */ \
-		for(int k = 0; k < OVERSAMPLE; k++) \
-		{ \
-			unsigned char *input_vector = input_rows[k] + j * OVERSAMPLE; \
-			for(int l = 0; l < OVERSAMPLE; l++) \
-			{ \
-				total += (input_vector[l] ? value : 0); \
-			} \
-		} \
- \
-/* Divide pixel */ \
-		if(OVERSAMPLE == 8) \
-			total >>= 6; \
-		else \
-		if(OVERSAMPLE == 4) \
-			total >>= 2; \
-		else \
-		if(OVERSAMPLE == 2) \
-			total >>= 2; \
-		else \
-			total /= OVERSAMPLE * OVERSAMPLE; \
- \
-		output_row[j] = total; \
-	} \
-}
-
-
+//
 // Downsample polygon
-		switch(mask->get_color_model())
+/*		switch(mask->get_color_model())
 		{
 			case BC_A8:
 			{
@@ -583,7 +688,7 @@
 
 	}
 
-
+*/
 	if(ptr->part == RECALCULATE_PART)
 	{
 // The feather could span more than one package so can't do it until
@@ -631,9 +736,11 @@
 
 #define APPLY_MASK_SUBTRACT_ALPHA(type, max, components, do_yuv) \
 { \
+	int chroma_offset = (max + 1) / 2; \
+	for(int i = ptr->row1; i < ptr->row2; i++) \
+	{ \
 	type *output_row = (type*)engine->output->get_rows()[i]; \
 	type *mask_row = (type*)engine->mask->get_rows()[i]; \
-	int chroma_offset = (max + 1) / 2; \
  \
 	for(int j  = 0; j < mask_w; j++) \
 	{ \
@@ -655,41 +762,47 @@
 			} \
 		} \
 	} \
+	} \
 }
 
 #define APPLY_MASK_MULTIPLY_ALPHA(type, max, components, do_yuv) \
 { \
+	int chroma_offset = (max + 1) / 2; \
+		for(int i = ptr->row1; i < ptr->row2; i++) \
+		{ \
 	type *output_row = (type*)engine->output->get_rows()[i]; \
 	type *mask_row = (type*)engine->mask->get_rows()[i]; \
-	int chroma_offset = (max + 1) / 2; \
  \
-	for(int j  = 0; j < mask_w; j++) \
+        if (components == 4) output_row += 3; \
+	for(int j  = mask_w; j != 0;  j--) \
 	{ \
 		if(components == 4) \
 		{ \
-			output_row[j * 4 + 3] = output_row[j * 4 + 3] * mask_row[j] / max; \
+			*output_row = *output_row * *mask_row / max; \
 		} \
 		else \
 		{ \
-			output_row[j * 3] = output_row[j * 3] * mask_row[j] / max; \
+			output_row[0] = output_row[3] * mask_row[0] / max; \
  \
-			output_row[j * 3 + 1] = output_row[j * 3 + 1] * mask_row[j] / max; \
-			output_row[j * 3 + 2] = output_row[j * 3 + 2] * mask_row[j] / max; \
+			output_row[1] = output_row[1] * mask_row[0] / max; \
+			output_row[2] = output_row[2] * mask_row[0] / max; \
  \
 			if(do_yuv) \
 			{ \
-				output_row[j * 3 + 1] += chroma_offset * (max - mask_row[j]) / max; \
-				output_row[j * 3 + 2] += chroma_offset * (max - mask_row[j]) / max; \
+				output_row[1] += chroma_offset * (max - mask_row[0]) / max; \
+				output_row[2] += chroma_offset * (max - mask_row[0]) / max; \
 			} \
 		} \
+		output_row += components; \
+		mask_row += 1;		 \
+	} \
 	} \
 }
 
 
 
-
 //printf("MaskUnit::process_package 1 %d\n", engine->mode);
-		for(int i = ptr->row1; i < ptr->row2; i++)
+	//	for(int i = ptr->row1; i < ptr->row2; i++)
 		{
 			switch(engine->mode)
 			{
@@ -746,6 +859,7 @@
 					break;
 			}
 		}
+//printf("diff2: %lli\n", get_difference(&start_time));
 	}
 //printf("MaskUnit::process_package 4 %d\n", get_package_number());
 }
@@ -755,7 +869,7 @@
 
 
 MaskEngine::MaskEngine(int cpus)
- : LoadServer(cpus, cpus * OVERSAMPLE * 2)
+ : LoadServer(cpus, cpus * 2)
 // : LoadServer(1, 2)
 {
 	mask = 0;
@@ -911,7 +1025,7 @@
 		}
 	}
 
-//printf("MaskEngine::do_mask 4 %d\n", recalculate);
+printf("MaskEngine::do_mask 4 %d\n", recalculate);
 
 
 	this->output = output;
