仅使用您显示的示例,您可以尝试以下操作。
awk -v OFS="," '
!NF || /^-$/{ next }
/<t>"COURSE"<\/t>/{
foundAge=foundName=""
foundCourse=1
count=0
}
/<t>"AGE"<\/t>/{
foundAge=1
foundName=""
count=0
}
/<t>"NAME"<\/t>/{
foundName=1
count=0
}
foundAge && match($0,/>[^<]*/){
age[++count]=substr($0,RSTART+1,RLENGTH-1)
}
foundName && match($0,/>[^<]*/){
name[++count]=substr($0,RSTART+1,RLENGTH-1)
}
foundCourse && match($0,/>[^<]*/){
course[++count]=substr($0,RSTART+1,RLENGTH-1)
}
END{
for(k=1;k<=count;k++){
if(name[k]){
print name[k],age[k],course[k]
}
}
}
' Input_file
解释:对上述内容添加详细解释。
awk -v OFS="," ' ##Starting awk program from here.
!NF || /^-$/{ next } ##if line is empty or starts with - then skip that line.
/<t>"COURSE"<\/t>/{ ##Checking if line has <t>"COURSE"</t> then do following.
foundAge=foundName="" ##Nullifying foundAge and foundName here.
foundCourse=1 ##Setting foundCourse to 1 here.
count=0 ##Setting count to 0 here.
}
/<t>"AGE"<\/t>/{ ##Checking if line has <t>"AGE"</t> then do following.
foundAge=1 ##Setting foundAge to 1 here.
foundName=foundCourse="" ##Nullifying foundName and foundCourse here.
count=0 ##Setting count to 0 here.
}
/<t>"NAME"<\/t>/{ ##Checking if line has <t>"NAME"</t> then do following.
foundName=1 ##Setting foundName to 1 here.
count=0 ##Setting count to 0 here.
}
foundAge && match($0,/>[^<]*/){ ##Checking if foundAge is set and using match function to get values from > to till < here.
age[++count]=substr($0,RSTART+1,RLENGTH-1) ##Creating age with index of count and having matched regex value here.
}
foundName && match($0,/>[^<]*/){ ##Checking if foundName is set and using match function to get values from > to till < here.
name[++count]=substr($0,RSTART+1,RLENGTH-1) ##Creating name with index of count and having matched regex value here.
}
foundCourse && match($0,/>[^<]*/){ ##Checking if foundCourse is set and using match function to get values from > to till < here.
course[++count]=substr($0,RSTART+1,RLENGTH-1) ##Creating course with index of count and having matched regex value here.
}
END{ ##Starting END block of this awk program from here.
for(k=1;k<=count;k++){ ##Traversing through all elements of name here.
if(name[k]){
print name[k],age[k],course[k] ##Printing respective array values here.
}
}
}
' Input_file ##Mentioning Input_file name here.
EDIT:根据OP的评论,如果一行中需要所有值,请尝试以下操作:
awk -v OFS="," '
!NF || /^-$/{ next }
/<t>"COURSE"<\/t>/{
foundAge=foundName=""
foundCourse=1
count=0
}
/<t>"AGE"<\/t>/{
foundAge=1
foundName=""
count=0
}
/<t>"NAME"<\/t>/{
foundName=1
count=0
}
foundAge && match($0,/>[^<]*/){
age[++count]=substr($0,RSTART+1,RLENGTH-1)
}
foundName && match($0,/>[^<]*/){
name[++count]=substr($0,RSTART+1,RLENGTH-1)
}
foundCourse && match($0,/>[^<]*/){
course[++count]=substr($0,RSTART+1,RLENGTH-1)
}
END{
for(k=1;k<=count;k++){
if(name[k]){
nameVal=(nameVal?nameVal OFS:"")name[k]
ageVal=(ageVal?ageVal OFS:"")age[k]
courseVal=(courseVal?courseVal OFS:"")course[k]
}
}
print nameVal,ageVal,courseVal
}
' Input_file