下面是批量10M使用CROSS JOIN的方法。在我的台式机上,大约 6 分钟就加载了 20 亿行。
--create and load 2B row table using cross join CTE source
CREATE TABLE dbo.TableHere(
ID int NOT NULL
);
DECLARE
@TargetRowCount int = 2000000000
, @RowsInserted int = 0;
WHILE @RowsInserted < @TargetRowCount
BEGIN
WITH
t10 AS (SELECT n FROM (VALUES(0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) t(n))
,t1k AS (SELECT 0 AS n FROM t10 AS a CROSS JOIN t10 AS b CROSS JOIN t10 AS c)
,t10m AS (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT 0)) AS num FROM t1k AS a
CROSS JOIN t1k AS b CROSS JOIN t10 AS c)
INSERT INTO dbo.TableHere WITH(TABLOCKX)
SELECT num + @RowsInserted
FROM t10m;
SET @RowsInserted += @@ROWCOUNT;
DECLARE @CurrentTimestampString varchar(1000) = FORMAT(SYSDATETIME(),'yyyy-MM-dd HH:mm:ss');
RAISERROR('%s: %d of %d rows inserted', 0, 0, @CurrentTimestampString, @RowsInserted, @TargetRowCount) WITH NOWAIT;
END;
GO
考虑创建一个具有增量值的永久统计表。这不仅会提高测试数据生成的性能,而且可以重用物化计数表来促进各种其他常见任务,例如生成增量值范围、日期时间序列等。
通过避免每次迭代都重复交叉联接,利用计数表将我的机器的性能提高了 30%(4.5 分钟)。EDIT:处理器升级后缩短至 2.5 分钟(英特尔 i9-12900K 处理器)。
SET NOCOUNT ON;
--create and load 10M row tally table
DROP TABLE IF EXISTS dbo.Tally;
CREATE TABLE dbo.Tally(
Num int NOT NULL CONSTRAINT PK_Tally PRIMARY KEY
);
WITH
t10 AS (SELECT n FROM (VALUES(0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) t(n))
,t1k AS (SELECT 0 AS n FROM t10 AS a CROSS JOIN t10 AS b CROSS JOIN t10 AS c)
,t10m AS (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT 0)) AS num FROM t1k AS a
CROSS JOIN t1k AS b CROSS JOIN t10 AS c)
INSERT INTO dbo.Tally WITH(TABLOCKX) (Num)
SELECT num
FROM t10m;
ALTER INDEX PK_Tally ON dbo.Tally REBUILD WITH (FILLFACTOR=100);
GO
--create and load 2B row table using tally table source
CREATE TABLE dbo.TableHere(
ID int NOT NULL
);
DECLARE
@TargetRowCount int = 2000000000
, @RowsInserted int = 0;
WHILE @RowsInserted < @TargetRowCount
BEGIN
INSERT INTO dbo.TableHere WITH(TABLOCKX) (ID)
SELECT Num + @RowsInserted
FROM dbo.Tally;
SET @RowsInserted += @@ROWCOUNT;
DECLARE @CurrentTimestampString varchar(1000) = FORMAT(SYSDATETIME(),'yyyy-MM-dd HH:mm:ss');
RAISERROR('%s: %d of %d rows inserted', 0, 0, @CurrentTimestampString, @RowsInserted, @TargetRowCount) WITH NOWAIT;
END;
GO