#!/usr/bin/python

######################################
# Matrix comparison significance test
# By Jacopo Cirrone and Dennis Shasha
# 
# Given two arrays, compare every two locations to determine whether
# they have the same relation to one another in the two arrays, e.g.
# does for all i, j compare v1[i] <= v1[j] and v2[i] <= v2[j].
# If they are both true or both false, then add one to the similarity score.
# Otherwise, do not.
# Then 10,000 times shuffle one array and try this again.
# This will give a p-value.
#

import random


input_file = "vivinput"

######################################
#
# Subroutines
#
######################################

# takes a list of groups (two or more)
# pools all values, shuffles them, and makes new groups 
# of same size as original groups
# returns these new groups
# example of shuffle with more than two groups: OneWayAnovaSig.py
def shuffle(myarray):
  random.shuffle(myarray)
  return myarray

# count similar relationships greater than or equal to or 
# less than relationships
def countsim(arr1, arr2):
  if len(arr1) != len(arr2):
     print("Array lengths are not the same.")
     return 0
  mysim = 0
  for i in range(len(arr1)):
    j = i+1
    while (j < len(arr1)):
      if (arr1[i] <= arr1[j]) and (arr2[i] <= arr2[j]):
        mysim+= 1
      if (arr1[i] > arr1[j]) and (arr2[i] > arr2[j]):
        mysim+= 1
      j+= 1
  return mysim

#  find p-value for a pair of arrays
def findpvalue(orig1, orig2, numshuffles):
  if len(orig1) != len(orig2):
     print("Array lengths are not the same.")
     return 1
  origcount = countsim(orig1, orig2)
  countbetter = 0
  for i in range(numshuffles):
    randarr = random.sample(orig2, k=len(orig2))
    # print(randarr)
    randcount = countsim(orig1, randarr)
    if randcount >= origcount:
      countbetter+= 1
  return countbetter / numshuffles
  
    
######################################
#
# Computations
#
######################################

numshuffles = 1000

# file must be in format name followed by numbers (e.g. a four by four
# matrix will have 16 numbers
infile=open(input_file, 'r')
allnames = []
allmat = []
alllines = infile.readlines()
for line in alllines:
  x = line[:-1].split(" ")
  allnames.append(x[0])
  y = []
  for e in x[1:]:
    y.append(float(e))
  allmat.append(y)
infile.close()

for i in range(len(allmat)):
  j= i+1
  while j < len(allmat):
    pval = findpvalue(allmat[i], allmat[j], numshuffles)
    print("Measurements " + allnames[i] + " and " + allnames[j] + " have similarity p-value of: " + str(pval))
    j+= 1