
    Eg%                        d Z ddlmZ ddlZddlmZ ddlZddlm	Z	 ddl
mZ ddlmZ ddlmZmZmZmZmZ erdd	lmZmZmZ dd
lmZmZ ddlmZmZmZmZ dZ d(dZ!dde dfd)d Z"de fd*d#Z#de dfd+d%Z$de dfd,d'Z%dS )-z"
data hash pandas / numpy objects
    )annotationsN)TYPE_CHECKING)hash_object_array)is_list_like)CategoricalDtype)ABCDataFrameABCExtensionArrayABCIndexABCMultiIndex	ABCSeries)HashableIterableIterator)	ArrayLikenpt)	DataFrameIndex
MultiIndexSeries0123456789123456arraysIterator[np.ndarray]	num_itemsintreturnnpt.NDArray[np.uint64]c                   	 t          |           }n0# t          $ r# t          j        g t          j                  cY S w xY wt          j        |g|           } t          j        d          }t          j        |          t          j        d          z   }d}t          |           D ]3\  }}||z
  }||z  }||z  }|t          j        d|z   |z             z  }|}4|dz   |k    s
J d            |t          j        d          z  }|S )	z
    Parameters
    ----------
    arrays : Iterator[np.ndarray]
    num_items : int

    Returns
    -------
    np.ndarray[uint64]

    Should be the same as CPython's tupleobject.c
    )dtypeiCB ixV4 r   iXB    zFed in wrong num_itemsi| )	nextStopIterationnparrayuint64	itertoolschain
zeros_like	enumerate)	r   r   firstmultoutlast_iia	inverse_is	            M/var/www/sysmax/venv/lib/python3.11/site-packages/pandas/core/util/hashing.pycombine_hash_arraysr1   /   s%   -V - - -x"),,,,,,- _eWf--F9WD
-

8!4!4
4CF&!!  1M	qt	%)+i7888A:"""$<"""29UCJs    *??Tutf8objIndex | DataFrame | Seriesindexboolencodingstrhash_key
str | None
categorizer   c                    ddl m} t          t           t                    r |t                     dd          S t           t                    r=t           j                  	                    dd          } || dd          }nWt           t                    rzt           j                  	                    dd          }|r7 fd	d
D             }t          j        |g|          }	t          |	d          } || j        dd          }nt           t                    rfd                                 D             }
t#           j                  }|r7 fdd
D             }|dz  }t          j        |
|          }d |D             }
t          |
|          } || j        dd          }nt'          dt)                                |S )a>  
    Return a data hash of the Index/Series/DataFrame.

    Parameters
    ----------
    obj : Index, Series, or DataFrame
    index : bool, default True
        Include the index in the hash (if Series/DataFrame).
    encoding : str, default 'utf8'
        Encoding for data & key when strings.
    hash_key : str, default _default_hash_key
        Hash_key for string key to encode.
    categorize : bool, default True
        Whether to first categorize object arrays before hashing. This is more
        efficient when the array contains duplicate values.

    Returns
    -------
    Series of uint64, same length as the object

    Examples
    --------
    >>> pd.util.hash_pandas_object(pd.Series([1, 2, 3]))
    0    14639053686158035780
    1     3869563279212530728
    2      393322362522515241
    dtype: uint64
    r   )r   Nr$   F)r   copyr=   )r5   r   r=   c              3  T   K   | ]"}t          j        d           j        V  #dS F)r5   r7   r9   r;   Nhash_pandas_objectr5   _values.0_r;   r7   r9   r3   s     r0   	<genexpr>z%hash_pandas_object.<locals>.<genexpr>   s^       	 	  #I%%)   	 	 	 	 	 	    N   c              3  L   K   | ]\  }}t          |j                  V  d S rI   )
hash_arrayrC   )rE   rF   seriesr;   r7   r9   s      r0   rG   z%hash_pandas_object.<locals>.<genexpr>   sK       
 
6 v~x:FF
 
 
 
 
 
rH   c              3  T   K   | ]"}t          j        d           j        V  #dS r@   rA   rD   s     r0   rG   z%hash_pandas_object.<locals>.<genexpr>   s^       	$ 	$  #I%%)   	$ 	$ 	$ 	$ 	$ 	$rH   r   c              3     K   | ]}|V  d S rI    )rE   xs     r0   rG   z%hash_pandas_object.<locals>.<genexpr>   s"      ))Aa))))))rH   zUnexpected type for hashing )pandasr   _default_hash_key
isinstancer   hash_tuplesr
   rL   rC   astyper   r%   r&   r1   r5   r   itemslencolumns	TypeErrortype)r3   r5   r7   r9   r;   r   hser
index_iterr   hashesr   index_hash_generator_hashess   ` ```         r0   rB   rB   S   s   F $#}%% 7Dvk#x::(QVWWWW	C	"	" 4Ds{Hh
CCJJ5 K 
 
 fQc>>>	C	#	# .Ds{Hh
CCJJ5 K 
 
  	/	 	 	 	 	 	 	  	 	 	J _aS*55F#FA..AfQcixeDDD	C	&	& D
 
 
 
 
 
 YY[[
 
 
 $$	 	*	$ 	$ 	$ 	$ 	$ 	$ 	$  	$ 	$ 	$  NI  of.BCCG)))))F	22fQcixeDDDBtCyyBBCCCJrH   vals+MultiIndex | Iterable[tuple[Hashable, ...]]c                D   t          |           st          d          ddlmm} t          | t                    s |j        |           n| fdt          j	                  D             }fd|D             }t          |t          |                    }|S )a  
    Hash an MultiIndex / listlike-of-tuples efficiently.

    Parameters
    ----------
    vals : MultiIndex or listlike-of-tuples
    encoding : str, default 'utf8'
    hash_key : str, default _default_hash_key

    Returns
    -------
    ndarray[np.uint64] of hashed values
    z'must be convertible to a list-of-tuplesr   )Categoricalr   c           	         g | ]=}                     j        |         t          j        |         d                     >S )F
categoriesordered)_simple_newcodesr   levels)rE   levelre   mis     r0   
<listcomp>zhash_tuples.<locals>.<listcomp>   s[       
 	 	HUO	%(8%HHH	
 	
  rH   c              3  H   K   | ]}|                     d           V  dS )Fr7   r9   r;   N)_hash_pandas_object)rE   catr7   r9   s     r0   rG   zhash_tuples.<locals>.<genexpr>   sM         	HQVWW     rH   )r   rZ   rR   re   r   rT   r   from_tuplesrangenlevelsr1   rX   )	rb   r7   r9   r   cat_valsr_   r\   re   rn   s	    ``    @@r0   rU   rU      s    $  CABBB       
 dM** #Z#D))    
 2:&&  H      F 	FCMM22AHrH   r   c                <   t          | d          st          d          t          | t                    r|                     |||          S t          | t
          j                  s%t          dt          |           j         d          t          | |||          S )a  
    Given a 1d array, return an array of deterministic integers.

    Parameters
    ----------
    vals : ndarray or ExtensionArray
    encoding : str, default 'utf8'
        Encoding for data & key when strings.
    hash_key : str, default _default_hash_key
        Hash_key for string key to encode.
    categorize : bool, default True
        Whether to first categorize object arrays before hashing. This is more
        efficient when the array contains duplicate values.

    Returns
    -------
    ndarray[np.uint64, ndim=1]
        Hashed values, same length as the vals.

    Examples
    --------
    >>> pd.util.hash_array(np.array([1, 2, 3]))
    array([ 6238072747940578789, 15839785061582574730,  2185194620014831856],
      dtype=uint64)
    r   zmust pass a ndarray-likerq   z6hash_array requires np.ndarray or ExtensionArray, not z!. Use hash_pandas_object instead.)
hasattrrZ   rT   r	   rr   r"   ndarrayr[   __name___hash_ndarray)rb   r7   r9   r;   s       r0   rL   rL      s    > 4!! 42333$)** 
''Z ( 
 
 	
 dBJ'' 
FDzz"F F F
 
 	

 x:>>>rH   
np.ndarrayc                   | j         }t          j        |t          j                  r6t	          | j        |||          }t	          | j        |||          }|d|z  z   S |t          k    r|                     d          } nt          |j
        t          j        t          j        f          r,|                     d                              dd          } n(t          |j
        t          j                  rA|j        dk    r6|                     d| j         j                                       d          } n|rbdd	lm}m}m}	  |	| d
          \  }
}t)           ||          d          }|                    |
|          }|                    ||d          S 	 t/          | ||          } nQ# t0          $ rD t/          |                     t2                                        t4                    ||          } Y nw xY w| | dz	  z  } | t          j        d          z  } | | dz	  z  } | t          j        d          z  } | | dz	  z  } | S )z!
    See hash_array.__doc__.
       u8i8Fr>      ur   )re   r   	factorize)sortrg   rq      l   e9z    l   b&&&	    )r   r"   
issubdtype
complex128r|   realimagr6   rV   
issubclassr[   
datetime64timedelta64viewnumberitemsizerR   re   r   r   r   rj   rr   r   rZ   r8   objectr$   )rb   r7   r9   r;   r   	hash_real	hash_imagre   r   r   rk   rh   rs   s                r0   r|   r|     sz    JE 
}UBM** *!$)XxLL	!$)XxLL	2	>)) }}{{4  	EJ ?	@	@ yy%%d%77	EJ		*	* u~/B/Byy2TZ02233::4@@
  	          !*	$U ; ; ;E:$j0A0A5QQQE))%77C**!H +   	$T8X>>DD 	 	 	$C  ''//8 DDD	 	DBJDBI()))DDBJDBI()))DDBJDKs   F- -AG;:G;)r   r   r   r   r   r   )r3   r4   r5   r6   r7   r8   r9   r:   r;   r6   r   r   )rb   rc   r7   r8   r9   r8   r   r   )
rb   r   r7   r8   r9   r8   r;   r6   r   r   )
rb   r}   r7   r8   r9   r8   r;   r6   r   r   )&__doc__
__future__r   r%   typingr   numpyr"   pandas._libs.hashingr   pandas.core.dtypes.commonr   pandas.core.dtypes.dtypesr   pandas.core.dtypes.genericr   r	   r
   r   r   collections.abcr   r   r   pandas._typingr   r   rR   r   r   r   r   rS   r1   rB   rU   rL   r|   rP   rH   r0   <module>r      s(    # " " " " "                     2 2 2 2 2 2 2 2 2 2 2 2 6 6 6 6 6 6                               
            ' ! ! ! !L ,a a a a aL %/ / / / /h %	.? .? .? .? .?f %	9 9 9 9 9 9 9rH   