U-SQL 不直接支持动态 U-SQL,但可以使用“脚本脚本”技术来创建输出。然后,您可以手动运行此输出,或使用 Powershell 或 Azure 数据工厂等工具来运行它。
我根据您的测试数据创建了一个简单的示例,部分基于来自的示例here.
// Get the initial fileset
@input =
EXTRACT longtitude float,
lattitude float,
date string,
hour int,
value1 int,
filename string
FROM "/input/File_201703{filename}"
USING Extractors.Csv();
// Add int version of the long and lat columns for grouping on
@working =
SELECT *,
(int) longtitude AS int_long,
(int) lattitude AS int_lat
FROM @input;
// Work out the filenames
@filenames =
SELECT String.Format("File {0}_{1}.csv", int_long, int_lat) AS outputFilename,
int_long,
int_lat
FROM
(
SELECT int_long,
int_lat
FROM @working
GROUP BY int_long,
int_lat
) AS x;
// Construct the dynamic usql and output it
@output =
SELECT x.xsort, "@input = EXTRACT longtitude float, lattitude float, date string, hour int, value1 int, filename string FROM \"input/File_201703{filename}\" USING Extractors.Csv();" AS usql
FROM ( VALUES ( 10 ) ) AS x(xsort)
UNION ALL
SELECT x.xsort, "@working = SELECT *, (int) longtitude AS int_long, (int) lattitude AS int_lat FROM @input;" AS usql
FROM ( VALUES ( 20 ) ) AS x(xsort)
UNION ALL
SELECT 30 AS xsort, String.Format("OUTPUT (SELECT * FROM @working WHERE int_long == {0} AND int_lat == {1}) TO \"/output/{2}\" USING Outputters.Csv();", int_long, int_lat, outputFilename) AS usql
FROM @filenames;
// Select only the usql column and sort the output
@output =
SELECT usql
FROM @output
ORDER BY xsort
FETCH 100;
OUTPUT @output
TO "/output/dynamic.usql"
USING Outputters.Text(delimiter : ' ', quoting : false);