# This is a command file for the GenomeHistory.pl program # "#" lines are comments # All commands are case-insensitive (but filenames may not be) # Commands are given both as "human readable" and short (one or two letters) #Protein sequences =: A FASTA format file containing all proteins #sequences used in the analysis #pi = orf_trans.fasta Protein sequences = test_trans.fasta #ORF sequences =: A FASTA format file containing all DNA open reading #frames to be used in the analysis (introns should be excluded) #ni = orf_coding.fasta ORF sequences = test_coding.fasta #Gene Name Field =: The field number in the FASTA input files that contains #the unique gene name for each sequence. If this parameter is not specified #the program will identify each gene by its full FASTA name string. #This gives ugly output, but it will work for most any genome #gn = 2 Gene Name Field = 2 #Gene file =:(Optional) A file listing all genes to be analyzed with BLAST #(if this file is ommited the program will run BLAST on all protein sequences #in the protein sequences file) # This file is of the format : # degree # #gf - wagner_yeast.txt #Gene file = yeast.txt #Output file =: File where Ks and Ka/Ks values will be written #o = genome_ks.txt Output file = test_ks.txt #Error file =: File where errors/warnings in alignments and analysis will be #written. This file is useful because it indicates which genes were not #analyzed successfully #e = genome_ks.err Error file = test_ks.err #Genetic code =: Gives the correct genetic code for the current genome #The universal code is the default. #U=Universal #VM=Vert. Mito. #YM=Yeast Mito. #MM=Mold Mito. #IM=Invert. Mito #CN=Ciliate Nuc. #EM=Echino. Mito. #gc=U/VM/YM/MM/IM/CN/EM Genetic code = U #If a genomes do not provide identifiers that allow one to one matching #between protein sequences and their corresponding nucleotide sequence, but #the ORFs and their protein sequences are in a one-to-one order, this #option will use order to find genes instead of identifiers #og = YES Ordered gene list = YES #BLAST matrix loc =: Gives the location of the BLAST Blosum matrix (assuming #it's not in the default location) #bl = /usr/local/wublast/BLOSUM62 #Only Top BLAST match = (YES/NO): If this option is specified, the protein will #return only the top BLAST hit for each sequence. Otherwise, it will return #all hits above the threshold set below #tb = NO #Only Top BLAST match = NO #BLAST Threshold =: When Only Top BLAST Hit is off, this option specifies the #minimum e-value which will be accepted for further analysis. This value #can be fairly permissive as poor hits are also weeded out with the pair-wise #percent similarity calculation. Default is e <= 0.01 #bt = 0.10 BLAST threshold = 0.00010 #Base frequences = #Determines how the ka/ks estimation routine will calculate base frequencies #Equal uses 0.25 for each base, Observed uses the average over both sequences of the #frequencies, and CODON uses the average over both sequences of the frequencies #at each codon position #bf=observed Base frequencies = CODON #Alignment method:= #By default, global (Needleman/Wunch) alignments are used. "Local" gives Smith/Waterman alignments #Alignment method = GLOBAL #am = GLOBAL #Gap exclusion:= #By default, gaps in the alignment are excluded in the computation of %ID and Ka/Ks. #Exclude gaps = YES #eg=YES #Minimum ORF Translation length =: Defines the translated length of an ORF, #below which that ORF will be ignored by the program. Default is 40 amino #acids #mo = 40 Minimum ORF Translation length = 40 #Minimum Number Aligned Residues =: Defines the minimum number of residues in #a sequence alignment that must be aligned against non-gap characters to #allow the analysis of that pair. Default is 40 amino acids #ma = 40 Minimum Number Aligned Residues = 40 #Percent identity threshold =: Sets the minimum similarity limit that will #allow sequences to be aligned and analyzed. Default if 0.50 (50% identical #residues). Similarity is calculated with a straight percent difference on #non-gap residues #pt = 0.20 Percent identity threshold = 0.20 #output length information = #Output statistics on ORF lengths (in amino acid residues) and non-gapped #alignment lengths. Useful for filtering for partial duplications #ol = off output length information = on #Warnings = (ON/OFF): By default, any BLAST hits that do not meet the length, similarity and #alignment criteria are discarded without being logged (Warnings = OFF). Use Warnings=ON to log #these discarded pairs in the error file #w= OFF #Warnings = OFF #Ignore List Gene Hits=(ON/OFF): By default, if a list of genes to search is provided, hits between #genes on that list will be returned (Ignore List Gene Hits=OFF). If you only want to search for hits #between genes on the list and other genes (for instance to compare two genomes without returning paralogs in either), use Ignore List Gene Hits=YES #il=NO #Checkpoint = (ON/OFF): GenomeHistory will automatically create a .checkpoint file as it runs, #which gives the current gene being analyzed. If this option is on, GenomeHistory with restart #from the point given in the current checkpoint file. If no checkpoint file exists, it will start #from the begining. Checkpoint file is named .GenomeHistory.chkpnt.(argfile) or .GenomeHistory.chkpnt.cmdline #cp = on Checkpoint = on