Source code for thompson.cantorsubset

"""
.. testsetup::

	from thompson.cantorsubset    import *
	from thompson.examples.random import random_basis, random_generators
	from thompson.word            import Word
	import random
"""

from .word       import Word, format_cantor, lambda_arguments
from .generators import Generators

__all__ = ["CantorSubset", "detailed_comparison"]

[docs]class CantorSubset(Generators):
	r"""A subclass of :class:`~thompson.generators.Generators` specifically designed to represent an open subset of the Cantor set :math:`\mathcal C`.
	We store this as a (typically sorted) list of :class:`~thompson.generators.Generators` :math:`u_1, \dots, u_n`.
	This stands for the set of all points :math:`u_i\Gamma` where :math:`\Gamma \in \{0,1\}^{\mathbb N}`.
	"""
[docs]	def is_entire_Cantor_set(self):
		self.simplify()
		return self == CantorSubset.standard_basis(self.signature)
	
[docs]	def is_empty(self):
		return len(self) == 0
	
[docs]	def status(self):
		#should really be an enumeration
		if self.is_empty():
			return -1
		elif self.is_entire_Cantor_set():
			return 1
		else:
			return 0
	
[docs]	def contract(self):
		r"""Contracts the current generating set as much as possible (without using words involving a :math:`\lambda`).
		The set should be sorted before using this method.
		
			>>> basis = random_basis(cls=CantorSubset)
			>>> basis.contract()
			>>> basis == CantorSubset.standard_basis(basis.signature)
			True
			>>> basis = CantorSubset((2,1), '01011 01010'.split())
			>>> basis.contract()
			>>> print(basis)
			[0101]
		"""
		arity = self.signature.arity
		index = 0
		while index <= len(self) - arity:
			this = self[index]
			L = len(this)
			parent = Word(this[:L-1], self.signature, preprocess=False)
			#can we contract this and the next (arity-1) elements?
			if (
				all( len(self[index+i]) == L > 1 for i in range(1, arity) )
				and
				all( self[index+i][:L-1] == parent for i in range(1, arity) )
			):
				self[index:index+arity] = [parent]
				index += 1
				#Maybe we can contract the thing we've just contracted with something previous that we couldn't contract before.
				#If parent ends in alpha n, we need to go back n-1 steps.
				if len(parent) > 1:
					#Last symbol is an alpha_n, stored as -n
					#Already gone forward one, so we go back by n steps.
						index = max(0, index + parent[-1])
			else:
				index += 1
	
[docs]	def simplify(self):
		r"""Simplifies the current Cantor subset to a normal form.
		We do this in three steps.
		Firstly we expand any :meth:`nonsimple Words <thompson.word.Word.is_simple>`.
		Second, we :meth:`contract` as much as possible (without using words involving a :math:`\lambda`).
		Last, we check to see if the set contains any pairs :math:`u, u\Gamma` and remove the latter.
		
			>>> X = CantorSubset((2,1), "0 11 10 00 111 1101 11110".split())
			>>> X.simplify(); print(X)
			[<entire Cantor set>]
			>>> X = CantorSubset((2,1), ["1"])
			>>> X.simplify(); print(X)
			[1]
		"""
		self.expand_away_lambdas()
		self.sort()
		
		index = 0
		while index < len(self) - 1:
			word = self[index]
			next_index = index + 1
			while next_index < len(self):
				next = self[next_index]
				if next[:len(word)] == word:
					next_index += 1
				else:
					break
			del self[ index+1:next_index ]
			index += 1
		
		self.contract()
	
[docs]	def __str__(self):
		r"""We use a notation introduced to us by Bleak: square brackets around a word stand for "the Cantor set underneath" its argument.
		We use the :func:`~thompson.word.format_cantor` function to display the elements of the generating set.
		
			>>> print(CantorSubset((2, 1)))
			[]
			>>> print(CantorSubset((2, 1), ["x"]))
			[<entire Cantor set>]
			>>> print(CantorSubset((2, 1), ["x a1"]))
			[0]
			>>> S = CantorSubset((2, 1), ["x a1", "x a1 a1", "x a2 a1 a1", "x a2 a1 a2"])
			>>> print(S)
			[0, 00, 100, 101]
			>>> S.simplify(); print(S)
			[0, 10]
		"""
		return "[" + ", ".join( format_cantor(word, subset=True) for word in self ) + "]"
	
[docs]	def __and__(self, other):
		r"""Computes the intersection of Cantor subsets.
		We assume the list of words is sorted before calling this function.
		
			>>> A = CantorSubset((2, 1), ["00"])
			>>> B = CantorSubset((2, 1), "01 111".split())
			>>> C = A & B
			>>> print(A, B, C)
			[00] [01, 111] []
			>>> D = CantorSubset((2, 1), ["11"])
			>>> print(D, B, D & B)
			[11] [01, 111] [111]
		
		"""
		if not isinstance(other, type(self)):
			raise TypeError("Don't know how to intersect {} and {} instances".format(
			  type(self).__name__, type(other).__name__))
		
		return type(self)(self.signature, ( x for x in self._intersect(other) ))
	
	def _intersect(self, other):
		sindex = 0
		oindex = 0
		while sindex < len(self) and oindex < len(other):
			subword, comparison = detailed_comparison(self[sindex], other[oindex]) 
			if subword:
				if comparison == 1:
					yield self[sindex]
					sindex += 1
					self, other    = other, self
					sindex, oindex = oindex, sindex
				elif comparison == -1:
					yield other[oindex]
					oindex += 1
				else:
					yield self[sindex]
					sindex += 1
					oindex += 1
			else:
				if comparison == 1:
					oindex += 1
				else:
					sindex += 1
	
[docs]	def __invert__(self):
		"""The complement. again it only works on a sorted list of leaves.
		
			>>> X = CantorSubset((2, 1), "01 1010 11".split())
			>>> print(~X)
			[00, 100, 1011]
			>>> print(~CantorSubset((2, 1), []))
			[<entire Cantor set>]
			>>> print(~CantorSubset((2, 1), ["x1"]))
			[]
			
			>>> X = random_generators(cls=CantorSubset, signature=(2,1))
			>>> X.sort();
			>>> comp = ~X
			>>> print(X & comp)
			[]
			>>> X.extend(comp); X.simplify()
			>>> print(X)
			[<entire Cantor set>]
		"""
		assert self.signature.alphabet_size == 1
		return type(self)(self.signature, ( Word(x, self.signature) for x in self._complement() ))
	 
	def _complement(self):
		#def print(*args, **kwargs): pass
		if len(self) == 0:
			yield "x1"
			return
		
		needle = [1] #the root x1
		index = 0
		while index < len(self):
			target = self[index]
			subword, comparison = detailed_comparison(needle, target)
			#print("needle:", format_cantor(needle), "target:", format_cantor(target), "subword:", subword, "comparison:", comparison)
			if subword and comparison == -1:
				#needle strictly above word
				#print("attempting to add on zeroes")
				extend_zeroes(needle, target)
				#print("extended needle:", format_cantor(needle), "target:", format_cantor(target))
				if len(target) == len(needle):
					#print("extended needle same length as target")
					comparison = 0
				else:
					assert len(target) > len(needle)
					#print("extended needle shorter than target, adding a zero")
					needle.append(-1)
					#print("extra zero on needle:", format_cantor(needle))
					subword = False
					comparison = -1
				
			if subword and comparison == 0:
				needle == target
				#print("needle == target\n===DON'T YIELD")
				needle = next_leaf(needle)
				index += 1
				#print("next target, next needle")
			elif not subword and comparison == -1:
				#print("needle not above target")
				#print("===YIELD ", format_cantor(needle))
				yield needle
				needle = next_leaf(needle)
				#print("next needle", format_cantor(needle))
			else:
				raise Exception("This shouldn't happen. Design your algorithm more carefully next time David!")
			#print("bottom of loop")
		
		while needle is not None:
			yield needle
			needle = next_leaf(needle)

def extend_zeroes(needle, target):
	start = len(needle)
	index = start
	while index < len(target) and (target[index] == -1): #alpha 1
		needle.append(-1)
		index += 1
	return index - start

def next_leaf(letters):
	while True:
		if len(letters) == 1 and letters[0] > 0: #root
			return None
		elif letters[-1] == -1: # should be > -arity here
			letters[-1] = -2
			return letters
		else:
			del letters[-1]

[docs]def detailed_comparison(self, other):
	"""Returns a tuple *(subword, comparison)* of two Booleans.
	Either one word is a subword of the other or not.
	In the former case *subword* is ``True``, otherwise *subword* is ``False``.
	
	In both cases we can decide if *self* is lexicographically smaller than *other*.
	If so, *comparison* is ``-1``. If they are equal, *comparison* is ``0``; if *self* is larger than *other* then *comparison* is ``1``.
	
		>>> for thing in ["0 00", "001 00", "101 101", "0 1", "11 00"]:
		... 	thing = thing.split()
		... 	s = Word(thing[0], (2, 1))
		... 	o = Word(thing[1], (2, 1))
		... 	print(*detailed_comparison(s, o))
		... 
		True -1
		True 1
		True 0
		False -1
		False 1
	"""
	for s, o in zip(self, other):
		result = cmp(s, o)
		if result != 0:
			return ( False, -result )
	#If we reach here, the one is a subword of the other
	return ( True, cmp(len(self), len(other)) ) 

def cmp(a, b):
	return (a > b) - (a < b)