chenchengfly的个人博客分享 http://blog.sciencenet.cn/u/chenchengfly

博文

perl根据列合并多个文件(不超过5个)

已有 3913 次阅读 2017-12-6 15:24 |系统分类:科研笔记

#!/usr/bin/perl -w
use strict;
#=============================================================#
# Function: This script is used to combine files (not more than 5) together according                   #   # their columns for the snp variant type .                                                                                         #
# Author: Chen Cheng                                                                                                                       #
# Email: hengbenxianfeng@163.com                                                                                                #    
# Time: 2017-12-6                                                                                                                             #
#=============================================================#

=pod
Example:
Before combine
A_snp_mut_num.txt:
Type    Number
A->C    91865
A->G    410769
A->T    101360
C->A    98085
C->G    69001
C->T    474221
G->A    472947
G->C    69380
G->T    97954
T->A    101894
T->C    412407
T->G    90893
Total    2490776
B_snp_mut_num.txt:
Type    Number
A->C    90612
A->G    405209
A->T    99977
C->A    96502
C->G    68034
C->T    469047
G->A    468642
G->C    68589
G->T    96689
T->A    100286
T->C    406962
T->G    89598
Total    2460147
After combine
Type    A    B    
A->C    90083    90612    
A->G    402874    405209    
A->T    99460    99977    
C->A    96002    96502    
C->G    67593    68034    
C->T    466987    469047    
G->A    466877    468642    
G->C    68030    68589    
G->T    96380    96689    
T->A    99623    100286    
T->C    404403    406962    
T->G    89092    89598    
Total    2447404    2460147    
Note: sample_list contain A and B, which is tab delimited.
=cut
die "perl $0 <sample_list>\n" unless @ARGV == 1;
open IN,$ARGV[0];
open OUT,">snp_type_num.xls";
my @arr = split /\t/,<IN>;
chomp @arr;
my $len = @arr;
close IN;
print OUT join("\t","Type",@arr),"\n";
if($len == 1){
my $a = $arr[0]."_snp_mut_num.txt";
open my $fa,"<$a";
while (my $aa = <$fa>){
chomp ($aa);
next if $. == 1;
print OUT join("\t",$aa),"\n";
}
}
elsif($len == 2){
my $b1 = $arr[0]."_snp_mut_num.txt";
my $b2 = $arr[1]."_snp_mut_num.txt";
open my $fb1,"<$b1";
open my $fb2,"<$b2";
while (my $bb1 = <$fb1>,my $bb2 = <$fb2>){
chomp ($bb1,$bb2);
next if $. == 1;
print OUT join("\t",$bb1,$bb2),"\n";
}    
}
elsif($len == 3){
my $c1 = $arr[0]."_snp_mut_num.txt";
my $c2 = $arr[1]."_snp_mut_num.txt";
my $c3 = $arr[2]."_snp_mut_num.txt";
open my $fc1,"<$c1";
open my $fc2,"<$c2";
open my $fc3,"<$c3";
while (my $cc1 = <$fc1>,my $cc2 = <$fc2>,my $cc3 = <$fc3>){
chomp ($cc1,$cc2,$cc3);
next if $. == 1;
$cc2 = (split /\t/,$cc2)[1];
$cc3 = (split /\t/,$cc3)[1];
print OUT join("\t",$cc1,$cc2,$cc3),"\n";
}
}
elsif($len == 4){
my $d1 = $arr[0]."_snp_mut_num.txt";
my $d2 = $arr[1]."_snp_mut_num.txt";
my $d3 = $arr[2]."_snp_mut_num.txt";
my $d4 = $arr[3]."_snp_mut_num.txt";
open my $fd1,"<$d1";
open my $fd2,"<$d2";
open my $fd3,"<$d3";
open my $fd4,"<$d4";
while (my $dd1 = <$fd1>,my $dd2 = <$fd2>,my $dd3 = <$fd3>,my $dd4 = <$fd4>){
chomp ($dd1,$dd2,$dd3,$dd4);
next if $. == 1;
$dd2 = (split /\t/,$dd2)[1];
$dd3 = (split /\t/,$dd3)[1];
$dd4 = (split /\t/,$dd4)[1];
print OUT join("\t",$dd1,$dd2,$dd3,$dd4),"\n";
}
}
elsif($len == 5){
my $e1 = $arr[0]."_snp_mut_num.txt";
my $e2 = $arr[1]."_snp_mut_num.txt";
my $e3 = $arr[2]."_snp_mut_num.txt";
my $e4 = $arr[3]."_snp_mut_num.txt";
my $e5 = $arr[4]."_snp_mut_num.txt";
open my $fe1,"<$e1";
open my $fe2,"<$e2";
open my $fe3,"<$e3";
open my $fe4,"<$e4";
open my $fe5,"<$e5";
while (my $ee1 = <$fe1>,my $ee2 = <$fe2>,my $ee3 = <$fe3>,my $ee4 = <$fe4>,my $ee5 = <$fe5>){
chomp ($ee1,$ee2,$ee3,$ee4,$ee5);
next if $. == 1;
$ee2 = (split /\t/,$ee2)[1];
$ee3 = (split /\t/,$ee3)[1];
$ee4 = (split /\t/,$ee4)[1];
$ee5 = (split /\t/,$ee5)[1];
print OUT join("\t",$ee1,$ee2,$ee3,$ee4,$ee5),"\n";
}
}else{
next;
}
close IN;
close OUT;








https://blog.sciencenet.cn/blog-3262030-1088500.html

上一篇:get_LOD_peak.pl(MapQTL6.0)
下一篇:Zotero使用说明
收藏 IP: 120.211.144.*| 热度|

0

该博文允许注册用户评论 请点击登录 评论 (0 个评论)

数据加载中...

Archiver|手机版|科学网 ( 京ICP备07017567号-12 )

GMT+8, 2024-3-29 12:57

Powered by ScienceNet.cn

Copyright © 2007- 中国科学报社

返回顶部