
    Eg9                    "   d dl mZ d dlmZ d dlZd dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZmZmZ d dlZd d	lmZ d d
lmZ d dlmZmZmZmZ d dlmZmZm Z  ej!        ej"        ej#        ej$        ej%        ej&        ej&        dZ'ej#        ej(        dfej&        ej)        e	fej!        ej*        dfej"        ej*        dfej$        ej*        dfej+        ej)        dfej%        ej,        d fiZ-ej*        dej(        dej)        diZ. G d de          Z/dS )    )annotations)AnyN)infer_dtype)iNaT)NoBufferPresent)cache_readonly)
ArrowDtypeBaseMaskedDtypeDatetimeTZDtype)is_string_dtype)PandasBuffer)ColumnColumnBuffersColumnNullType	DtypeKind)ArrowCTypes
Endiannessdtype_to_arrow_c_fmt)iufbUMmzThis column is non-nullablezThis column uses NaN as nullz!This column uses a sentinel valuec                      e Zd ZdZd d!d	Zd"dZed"d            Zed#d            Z	d#dZ
ed             Zed             Zed"d            Zed$d            Zd"dZd%d&dZd'dZd(dZd(dZd(dZdS ))PandasColumna  
    A column object, with only the methods and properties required by the
    interchange protocol defined.
    A column can contain one or more chunks. Each chunk can contain up to three
    buffers - a data buffer, a mask buffer (depending on null representation),
    and an offsets buffer (if variable-size binary; e.g., variable-length
    strings).
    Note: this Column object can only be produced by ``__dataframe__``, so
          doesn't need its own version or ``__column__`` protocol.
    Tcolumn	pd.Series
allow_copyboolreturnNonec                    t          |t          j                  rt          d|j         d          t          |t          j                  s t          dt          |           d          || _        || _	        dS )zu
        Note: doesn't deal with extension arrays yet, just assume a regular
        Series/ndarray for now.
        zExpected a Series, got a DataFrame. This likely happened because you called __dataframe__ on a DataFrame which, after converting column names to string, resulted in duplicated names: zD. Please rename these columns before using the interchange protocol.zColumns of type  not handled yetN)

isinstancepd	DataFrame	TypeErrorcolumnsSeriesNotImplementedErrortype_col_allow_copy)selfr   r!   s      S/var/www/sysmax/venv/lib/python3.11/site-packages/pandas/core/interchange/column.py__init__zPandasColumn.__init__K   s    
 fbl++ 	2 !.2 2 2   &"),, 	Y%&Wf&W&W&WXXX 	%    intc                    | j         j        S )z2
        Size of the column, in elements.
        )r/   sizer1   s    r2   r7   zPandasColumn.size_   s     y~r4   c                    dS )z7
        Offset of first element. Always zero.
        r    r8   s    r2   offsetzPandasColumn.offsete   s	     qr4   tuple[DtypeKind, int, str, str]c                   | j         j        }t          |t          j                  rJ| j         j        j        }|                     |j                  \  }}}}t          j	        ||t          j        fS t          |          rLt          | j                   dv r't          j        dt          |          t          j        fS t!          d          |                     |          S )N)stringempty   z.Non-string object dtypes are not supported yet)r/   dtyper'   r(   CategoricalDtypevaluescodes_dtype_from_pandasdtyper   CATEGORICALr   NATIVEr   r   STRINGr   r-   )r1   rA   rD   _bitwidthc_arrow_dtype_f_strs         r2   rA   zPandasColumn.dtypem   s    	eR011 	7I$*E ,,U[99# %#!	  U## 
	749%%)<<<$(//%	  &&VWWW//666r4   c                |   t                               |j        d          }|t          d| d          t	          |t
                    r|j        j        }nKt	          |t                    r|j	        j        }n)t	          |t                    r|j        j        }n|j        }||j        dz  t          |          |fS )z/
        See `self.dtype` for details.
        N
Data type z& not supported by interchange protocolr@   )	_NP_KINDSgetkind
ValueErrorr'   r	   numpy_dtype	byteorderr   baser
   itemsizer   )r1   rA   rP   rS   s       r2   rE   z$PandasColumn._dtype_from_pandasdtype   s     }}UZ..<W%WWWXXXeZ(( 	()3II// 	(
,II// 	()3IIIU^a')=e)D)DiOOr4   c                    | j         d         t          j        k    st          d          | j        j        j        dt          t          j	        | j        j        j
                            dS )a:  
        If the dtype is categorical, there are two options:
        - There are only values in the data buffer.
        - There is a separate non-categorical Column encoding for categorical values.

        Raises TypeError if the dtype is not categorical

        Content of returned dict:
            - "is_ordered" : bool, whether the ordering of dictionary indices is
                             semantically meaningful.
            - "is_dictionary" : bool, whether a dictionary-style mapping of
                                categorical values to other objects exists
            - "categories" : Column representing the (implicit) mapping of indices to
                             category values (e.g. an array of cat1, cat2, ...).
                             None if not a dictionary-style categorical.
        r   zCdescribe_categorical only works on a column with categorical dtype!T)
is_orderedis_dictionary
categories)rA   r   rF   r*   r/   catorderedr   r(   r,   rY   r8   s    r2   describe_categoricalz!PandasColumn.describe_categorical   sg    $ z!}	 555U  
 )-/!&ry1I'J'JKK
 
 	
r4   c                    | j         d         }	 t          |         \  }}n!# t          $ r t          d| d          w xY w||fS )Nr   rM   z not yet supported)rA   _NULL_DESCRIPTIONKeyErrorr-   )r1   rP   nullvalues       r2   describe_nullzPandasColumn.describe_null   sh    z!}	M+D1KD%% 	M 	M 	M%&K4&K&K&KLLL	M U{s     >c                |    | j                                                                                                         S )zB
        Number of null elements. Should always be known.
        )r/   isnasumitemr8   s    r2   
null_countzPandasColumn.null_count   s.    
 y~~##%%**,,,r4   dict[str, pd.Index]c                    d| j         j        iS )z8
        Store specific metadata of the column.
        zpandas.index)r/   indexr8   s    r2   metadatazPandasColumn.metadata   s    
 	00r4   c                    dS )zE
        Return the number of chunks the column consists of.
           r:   r8   s    r2   
num_chunkszPandasColumn.num_chunks   s	     qr4   Nn_chunks
int | Nonec              #     K   |rr|dk    rlt          | j                  }||z  }||z  dk    r|dz  }t          d||z  |          D ].}t          | j        j        |||z            | j                  V  /dS | V  dS )zy
        Return an iterator yielding the chunks.
        See `DataFrame.get_chunks` for details on ``n_chunks``.
        rm   r   N)lenr/   ranger   ilocr0   )r1   ro   r7   stepstarts        r2   
get_chunkszPandasColumn.get_chunks   s      
  
	1ty>>D8#Dh!##	q$/488  "IN554<#78$:J      
 JJJJJr4   r   c                    |                                  ddd}	 |                                 |d<   n# t          $ r Y nw xY w	 |                                 |d<   n# t          $ r Y nw xY w|S )a`  
        Return a dictionary containing the underlying buffers.
        The returned dictionary has the following contents:
            - "data": a two-element tuple whose first element is a buffer
                      containing the data and whose second element is the data
                      buffer's associated dtype.
            - "validity": a two-element tuple whose first element is a buffer
                          containing mask values indicating missing data and
                          whose second element is the mask value buffer's
                          associated dtype. None if the null representation is
                          not a bit or byte mask.
            - "offsets": a two-element tuple whose first element is a buffer
                         containing the offset values for variable-size binary
                         data (e.g., variable-length strings) and whose second
                         element is the offsets buffer's associated dtype. None
                         if the data buffer does not have an associated offsets
                         buffer.
        N)datavalidityoffsetsrz   r{   )_get_data_buffer_get_validity_bufferr   _get_offsets_buffer)r1   bufferss     r2   get_bufferszPandasColumn.get_buffers   s    ( ))++"
 "
	"&";";"="=GJ 	 	 	D		!%!9!9!;!;GI 	 	 	D	 s   2 
??A 
A('A(tuple[PandasBuffer, Any]c                0   | j         d         t          j        t          j        t          j        t          j        t          j        fv r| j         d         t          j        k    rPt          | j         d                   dk    r2| j        j	        
                    d                                          }n| j                                        }t          || j                  }| j         }n*| j         d         t          j        k    rB| j        j        j        }t          || j                  }|                     |j                   }n| j         d         t          j        k    r| j                                        }t'                      }|D ]@}t)          |t*                    r)|                    |                    d                     At          t1          j        |d	                    }| j         }nt5          d
| j        j          d          ||fS )zZ
        Return the buffer containing the data and the buffer's associated dtype.
        r         N)r!   utf-8encodinguint8)rA   rM   r&   )rA   r   INTUINTFLOATBOOLDATETIMErr   r/   dt
tz_convertto_numpyr   r0   rF   rC   _codesrE   rH   	bytearrayr'   strextendencodenp
frombufferr-   )r1   np_arrbufferrA   rD   bufr   objs           r2   r|   zPandasColumn._get_data_buffer  s    :a=MNON
 
 
 z!}	 222s4:a=7I7IA7M7M0066??AA++--!&T5EFFFFJEEZ]i333I$+E!%D4DEEEF00==EEZ]i...)$$&&CA  ; ;c3'' ;HHSZZZ99::: ""-"A"A"ABBF
 JEE%&T49?&T&T&TUUUu}r4   c                8   | j         \  }}| j        d         t          j        k    r| j                                        }|dk    }| }t          j        t          |          ft          j	                  }t          |          D ]!\  }}t          |t                    r|n|||<   "t          |          }t          j        dt          j        t           j        f}	||	fS 	 t$          |          d}
n# t&          $ r t)          d          w xY wt+          |
          )z
        Return the buffer containing the mask values indicating missing data and
        the buffer's associated dtype.
        Raises NoBufferPresent if null representation is not a bit or byte mask.
        r   shaperA   r@   z! so does not have a separate maskzSee self.describe_null)rb   rA   r   rH   r/   r   r   zerosrr   bool_	enumerater'   r   r   r   r   r   rG   _NO_VALIDITY_BUFFERr_   r-   r   )r1   r`   invalidr   validmaskr   r   r   rA   msgs              r2   r}   z!PandasColumn._get_validity_bufferC  s,    *g:a=I,,, )$$&&C qLEiG83s88+RX>>>D#C.. E E3#-c3#7#7D%%WQ "$''F ^Q(8*:KLE5= 	@(.QQQCC 	@ 	@ 	@%&>???	@ c"""s   C0 0D
c                   | j         d         t          j        k    r| j                                        }d}t          j        t          |          dz   ft
          j                  }t          |          D ]J\  }}t          |t                    r(|                    d          }|t          |          z  }|||dz   <   Kt          |          }t          j        dt          j        t"          j        f}nt'          d          ||fS )a  
        Return the buffer containing the offset values for variable-size binary
        data (e.g., variable-length strings) and the buffer's associated dtype.
        Raises NoBufferPresent if the data buffer does not have an associated
        offsets buffer.
        r   rm   r   r   r   @   zJThis column has a fixed-length dtype so it does not have an offsets buffer)rA   r   rH   r/   r   r   r   rr   int64r   r'   r   r   r   r   r   INT64r   rG   r   )	r1   rC   ptrr{   r   vr   r   rA   s	            r2   r~   z PandasColumn._get_offsets_bufferi  s    :a=I,,,Y''))FChc&kkAo%7rxHHHG!&)) % %1 a%% "'22A3q66MC!$A "'**F !!	EE "5  
 u}r4   )T)r   r    r!   r"   r#   r$   )r#   r5   )r#   r<   )r#   rh   )N)ro   rp   )r#   r   )r#   r   )__name__
__module____qualname____doc__r3   r7   propertyr;   r   rA   rE   r\   rb   rg   rk   rn   rw   r   r|   r}   r~   r:   r4   r2   r   r   ?   s       	 	& & & & &(       X 7 7 7 ^7:P P P P. 
 
 X
8   X - - - ^- 1 1 1 X1       "# # # #J. . . .`$# $# $# $#L& & & & & &r4   r   )0
__future__r   typingr   numpyr   pandas._libs.libr   pandas._libs.tslibsr   pandas.errorsr   pandas.util._decoratorsr   pandas.core.dtypes.dtypesr	   r
   r   pandasr(   pandas.api.typesr   pandas.core.interchange.bufferr   *pandas.core.interchange.dataframe_protocolr   r   r   r   pandas.core.interchange.utilsr   r   r   r   r   r   r   rH   r   rN   USE_NANUSE_SENTINELNON_NULLABLErF   USE_BYTEMASKr^   r   r   r:   r4   r2   <module>r      s^   " " " " " "           ( ( ( ( ( ( $ $ $ $ $ $ ) ) ) ) ) ) 2 2 2 2 2 2              , , , , , , 7 7 7 7 7 7                     
									 	 On,d34d;MN/6N^0$7N^0$7 N7<~2A6  !>:!D P P P P P6 P P P P Pr4   