fork download
  1. BEGIN{RS="";ORS="\n\n"; FS="\n"}
  2. {seq="";for(i=2;i<=NF;++i) seq=seq toupper($i)}
  3. !(seq in a){print; a[seq]}
Success #stdin #stdout 0s 4560KB
stdin
>sequence1_CP [seq  virus]
MQCKSGTNNVFTAIKYTTNNNIIYKSENNDNIIFTKNIFNVVTTKDAFIFSKNRGIMNL
DITKKFDYHEHRPKLCVFKIINTQYVNSPEKMIDAWPTMDIVALITE

>sequence2 [virus]
MQCKSGTNNVFTAIKYTTNNNIIYKSENNDNIIFTKNIFNVVTTKDAFIFSKNRGIMNL
DITKKFDYHEHRPKLCVFKIINTQYVNSPEKMIDAWPTMDIVALITE

>sequence3
MQCKSGTNNVFTAIKYTTNNNIIYKSENNDNIIFTKNIFNVVTTKDAFIFSKNRGIMNL
DITKKFDYHEHRPKLCVFKIINTQYVNSPEKMIDAWPTMDIVALITE

>sequence4_CP hypothetical protein [another virus]
MLRHSCVMPQQKLKKRFFFLRRLRKILRYFFTCNFLNLFFINREYNIENITLSYLKKERIPVWKTSDMSN
IVRKWWMFHRKTQLEDNIEIKKDIQLYHFFYNGLFIKTNYPYVYHIDKKKKYDFNDMKVIYLPAIHMHSK

>sequence5 hypothetical protein [another virus]
MLRHSCVMPQQKLKKRFFFLRRLRKILRYFFTCNFLNLFFINREYNIENITLSYLKKERIPVWKTSDMSN
IVRKWWMFHRKTQLEDNIEIKKDIQLYHFFYNGLFIKTNYPYVYHIDKKKKYDFNDMKVIYLPAIHMHSK

>sequence6 |hypothetical protein[virus]
MQCKSGTNNVFTAIKYTTNNNIIYKSENNDNIIFTKNIFNVVTTKDAFIFSKNRGIMNLD
ITKKFDYHEHRPKLCVFKIINTQYVNSPEKMIDAWPTMDIVALITE

>sequence7 |hypothetical protein[virus]
MQCKSGTNNVFTAIKYTTNNNIIYKSENNDNIIFTKNIFNVVTTKDAFIFSKNRGIMNLD
ITKKFDYHEHRPKLCVFKIINTQYVNSPEKMIDAWPTMDIVALITE
stdout
>sequence1_CP [seq  virus]
MQCKSGTNNVFTAIKYTTNNNIIYKSENNDNIIFTKNIFNVVTTKDAFIFSKNRGIMNL
DITKKFDYHEHRPKLCVFKIINTQYVNSPEKMIDAWPTMDIVALITE

>sequence4_CP hypothetical protein [another virus]
MLRHSCVMPQQKLKKRFFFLRRLRKILRYFFTCNFLNLFFINREYNIENITLSYLKKERIPVWKTSDMSN
IVRKWWMFHRKTQLEDNIEIKKDIQLYHFFYNGLFIKTNYPYVYHIDKKKKYDFNDMKVIYLPAIHMHSK