# MARC Workshop at PSC

# Essential Computing for Bioinformatics
#
# Examples by: Stuart Pomerance
# Pittsburgh Supercomputing Center


# ex. 0 
#
# open a file for reading
# not too useful, since not doing anything with the file
# closing the file
#

def example0():
    file = open("test.txt","r")
    file.close()

# ex. 1
#
# open a file for reading
# read a single line of the file and store it in the variable 'line'
#

def example1(pathname):
    file = open(pathname,"r")
    line = file.readline()
    print (line)
    file.close()

# ex. 2
#
# open a file for reading
# read all the lines of the file and store them in the variable 'line'
#

def example2(pathname):
    file = open(pathname,"r")
    line = file.read()
    print (line) 
    file.close()

# ex. 3
#
# read all the lines of the file and store them in a list
# print the 3rd line
#

def example3(pathname):
    file = open(pathname,"r")
    line = file.readlines()
    print (line[2])
    file.close()

# ex. 4
#
# read 1 line at a time
# print the line numer and the line
#

def example4(pathname):
    file = open(pathname,"r")
    count = 0
    for line in file:
	print count, ': ', line
	count = count + 1
    file.close()

# ex. 5
#
# read 1 line at a time
# print only the fasta sequence header
#

def example5(pathname):
    file = open(pathname,"r")
    count = 0
    for line in file:
        if line[0] == '>':
            print (count,":",line)
            count = count + 1
    file.close()

# ex. 6
#
# read 1 line at a time
# identify the fasta sequence header and the sequence data
#

def example6(pathname):
    file = open(pathname,"r")
    for line in file:
	if line[0] == '>':
	    print ("header: " + line)
	else:
	    print ("data: " + line)
    file.close()

# ex. 7
#
# read 1 line at a time
# identify the fasta sequence header and the sequence data
# account for blank lines and trailing space/newlines
#

def example7(pathname):
    file = open(pathname,"r")
    for line in file:
	# remove the trailing '\n' and trailing spaces
	line = line.rstrip('\n ')

	# if the line length is < 1, there nothing to do for this line
	# so move to the next line
	if len( line ) < 1:
	    continue
	if line[0] == '>':
	    print ("header: " + line)
	else:
	    print ("data: " + line)
    file.close()


# ex. 8
#
# read 1 line at a time
# identify the fasta sequence header and the sequence data
# account for blank lines and trailing space/newlines
# collect the sequence data
#

def example8(pathname):
    file = open(pathname,"r") 

    sequence_data = ''
    
    for line in file:
	# remove the trailing '\n' and trailing spaces
	line = line.rstrip('\n ')

	# if the line length is < 1, there nothing to do for this line
	# so move to the next line
	if len( line ) < 1:
	    continue
	if line[0] == '>':
	    print ("header: " + line)
	else:
	    print ("data: " + line)
	    sequence_data = sequence_data + line
    file.close()
    print (sequence_data)


# ex. 9
#
# read 1 line at a time
# identify the fasta sequence header and the sequence data
# account for blank lines and trailing space/newlines
# collect the sequence data for multiple sequences in a file
# using a list
#

def example9():
    file = open(pathname,"r") 
    sequence_data = []
    for line in file:
	# remove the trailing '\n' and trailing spaces
	line = line.rstrip('\n ')

	# if the line length is < 1, there nothing to do for this line
	# so move to the next line
	if len( line ) < 1:
	    continue

	if line[0] == '>':
	    print ("header: " + line)
	    sequence_data.append('')
	else:
	    print ("data: " + line)
	    k = len(sequence_data) - 1
	    sequence_data[k] = sequence_data[k] + line
    file.close()
    print (sequence_data)

# ex. 10
#
# read 1 line at a time
# identify the fasta sequence header and the sequence data
# account for blank lines and trailing space/newlines
# collect the sequence data for multiple sequences in a file
# using a list
# making the whole thing a function

def readFastaFile(filename):

	file = open(filename,"r") 
	
	sequence_data = []
	
	for line in file:

		# remove the trailing '\n' and trailing spaces
		line = line.rstrip('\n ')

		# if the line length is < 1, there nothing to do for this line
		# so move to the next line
		if len( line ) < 1:
			continue
		if line[0] == '>':
			sequence_data.append([line.replace(',',' '),''])
		else:
			k = len(sequence_data) - 1
			sequence_data[k][1] = sequence_data[k][1] + line

	file.close()
	return(sequence_data)

def writeSequenceToFastaFile(outfile, sequence):
	'Writes the DNA/RNA sequence or protein to Fasta file 70 characters per line'
	charsPerline = 70
	nextPosition = 0
	while (nextPosition < len(sequence)):
		outfile.write(sequence[nextPosition:nextPosition+charsPerLine])
		nextPosition = nextPosition + charsPerLine

def translateFastaFile(infilename, outfilename):
	' Translates a Fasta file with sequences into a Fasta file with their reverse complement'
	infile = open(infilename,"r")
	outfile = open(outfilename,"w")
	sequence_data = ''
	for line in infile:
		# remove the trailing '\n' and trailing spaces
		line = line.rstrip('\n ')
		# if the line length is < 1, do nothing
		# so skip rest of iteration
		if len( line ) < 1:
			continue
		if (line[0] == '>' and len(sequence_data)):
			outfile.write(line+'\n')
			writeSequenceToFastaFile(outfile, translateDNASequence(sequence_data))
			sequence_data='' # Reset for next sequence
		else:
			sequence_data = sequence_data + line
		infile.close()
		outfile.close()