# This is a command file for the GenomeHistory.pl program
# "#" lines are comments
# All commands are case-insensitive (but filenames may not be)
# Commands are given both as "human readable" and short (one or two letters)

#Protein sequences =: A FASTA format file containing all proteins 
#sequences used in the analysis
#pi = orf_trans.fasta
Protein sequences = test_trans.fasta

#ORF sequences =: A FASTA format file containing all DNA open reading 
#frames to be used in the analysis (introns should be excluded)
#ni = orf_coding.fasta
ORF sequences = test_coding.fasta

#Gene Name Field =: The field number in the FASTA input files that contains
#the unique gene name for each sequence.  If this parameter is not specified
#the program will identify each gene by its full FASTA name string.
#This gives ugly output, but it will work for most any genome
#gn = 2
Gene Name Field = 2

#Gene file =:(Optional) A file listing all genes to be analyzed with BLAST
#(if this file is ommited the program will run BLAST on all protein sequences
#in the protein sequences file)
# This file is of the format :
# <GENENAME> degree # <further text ignored>
#gf - wagner_yeast.txt
#Gene file = yeast.txt

#Output file =: File where Ks and Ka/Ks values will be written
#o = genome_ks.txt
Output file = test_ks.txt

#Error file =: File where errors/warnings in alignments and analysis will be 
#written.  This file is useful because it indicates which genes were not
#analyzed successfully
#e = genome_ks.err
Error file = test_ks.err


#Genetic code =: Gives the correct genetic code for the current genome
#The universal code is the default.
#U=Universal
#VM=Vert. Mito.
#YM=Yeast Mito.
#MM=Mold Mito.
#IM=Invert. Mito
#CN=Ciliate Nuc.
#EM=Echino. Mito.
#gc=U/VM/YM/MM/IM/CN/EM
Genetic code = U


#If a genomes do not provide identifiers that allow one to one matching 
#between protein sequences and their corresponding nucleotide sequence, but 
#the ORFs and their protein sequences are in a one-to-one order, this 
#option will use order to find genes instead of identifiers  
#og = YES
Ordered gene list = YES


#BLAST matrix loc =: Gives the location of the BLAST Blosum matrix (assuming 
#it's not in the default location)
#bl = /usr/local/wublast/BLOSUM62

#Only Top BLAST match = (YES/NO): If this option is specified, the protein will
#return only the top BLAST hit for each sequence.  Otherwise, it will return
#all hits above the threshold set below
#tb = NO
#Only Top BLAST match = NO

#BLAST Threshold =:  When Only Top BLAST Hit is off, this option specifies the
#minimum e-value which will be accepted for further analysis.  This value
#can be fairly permissive as poor hits are also weeded out with the pair-wise
#percent similarity calculation.  Default is e <= 0.01
#bt = 0.10
BLAST threshold = 0.00010

#Base frequences = <CODON/OBSERVED/EQUAL>
#Determines how the ka/ks estimation routine will calculate base frequencies
#Equal uses 0.25 for each base, Observed uses the average over both sequences of the
#frequencies, and CODON uses the average over both sequences of the frequencies 
#at each codon position
#bf=observed
Base frequencies = CODON

#Alignment method:= <GLOBAL/LOCAL>
#By default, global (Needleman/Wunch) alignments are used. "Local" gives Smith/Waterman alignments
#Alignment method = GLOBAL
#am = GLOBAL

#Gap exclusion:= <YES/NO>
#By default, gaps in the alignment are excluded in the computation of %ID and Ka/Ks.
#Exclude gaps = YES
#eg=YES

#Minimum ORF Translation length =: Defines the translated length of an ORF, 
#below which that ORF will be ignored by the program.  Default is 40 amino 
#acids 
#mo = 40
Minimum ORF Translation length = 40

#Minimum Number Aligned Residues =: Defines the minimum number of residues in
#a sequence alignment that must be aligned against non-gap characters to
#allow the analysis of that pair.  Default is 40 amino acids 
#ma = 40
Minimum Number Aligned Residues = 40

#Percent identity threshold =: Sets the minimum similarity limit that will
#allow sequences to be aligned and analyzed.  Default if 0.50 (50% identical
#residues).  Similarity is calculated with a straight percent difference on
#non-gap residues
#pt = 0.20
Percent identity threshold = 0.20

#output length information = <ON/OFF>
#Output statistics on ORF lengths (in amino acid residues) and non-gapped 
#alignment lengths.  Useful for filtering for partial duplications
#ol = off
output length information = on

#Warnings = (ON/OFF):  By default, any BLAST hits that do not meet the length, similarity and 
#alignment criteria are discarded without being logged (Warnings = OFF).  Use Warnings=ON to log 
#these discarded pairs in the error file
#w= OFF
#Warnings = OFF

#Ignore List Gene Hits=(ON/OFF):  By default, if a list of genes to search is provided, hits between
#genes on that list will be returned (Ignore List Gene Hits=OFF).  If you only want to search for hits 
#between genes on the list and other genes (for instance to compare two genomes without returning 
paralogs in either), use Ignore List Gene Hits=YES
#il=NO

#Checkpoint = (ON/OFF):  GenomeHistory will automatically create a .checkpoint file as it runs, 
#which gives the current gene being analyzed.  If this option is on, GenomeHistory with restart 
#from the point given in the current checkpoint file.  If no checkpoint file exists, it will start
#from the begining.  Checkpoint file is named .GenomeHistory.chkpnt.(argfile) or .GenomeHistory.chkpnt.cmdline
#cp = on
Checkpoint = on