
    EgC                    B   d dl mZ d dlZd dlmZ d dlZd dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmc mZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ erd dlmZ d dlm Z  d dl!m"Z" d-dZ# eed         dddz            	 	 	 	 	 	 d.d/d!            Z$d0d1d&Z%	 d2d3d,Z&dS )4    )annotationsN)TYPE_CHECKING)Appender)is_list_like)concat_compat)notna)
MultiIndex)concat)tile_compat)_shared_docs)
to_numeric)Hashable)AnyArrayLike)	DataFramevariablestrreturnlistc                    | ]t          |           s| gS t          |t                    r't          | t                    st	          | d          t          |           S g S )Nz7 must be a list of tuples when columns are a MultiIndex)r   
isinstancer	   r   
ValueError)arg_varsr   columnss      M/var/www/sysmax/venv/lib/python3.11/site-packages/pandas/core/reshape/melt.pyensure_list_varsr      st    H%% 	":,, 	"Z$5O5O 	"TTT   >>!	    meltzpd.melt(df, zDataFrame.melt)callerothervalueTframer   
value_namer   ignore_indexboolc                    | j         v rt          d| d          t          |d j                   }|d u}t          |d j                   }|s|r| j                             |          }n j         }||z   }	|                    |	          }
|
dk    }|                                r,d t          |	|          D             }t          d|           |r$ j        d d t          j
        |
          f          n)                                  n                                  | j                             |           _         |t           j         t                    rt           j         j                  t          t!           j         j                            k    r j         j        }nrd t#          t           j         j                            D             }nA j         j         j         j        nd	g}n%t'          |          rt          d
|d          |g} j        \  }}|t          |          z
  }i }|D ]}                     |          }t          |j        t.          j                  sH|dk    rt1          |g|z  d          ||<   U t3          |          g |j        |j                  ||<   ~t/          j        |j        |          ||<   ||z   |gz   } j        d         dk    rZt          d  j        D                       s<t1           fdt#           j        d                   D                       j        ||<   n j                            d          ||<   t?          |          D ]5\  }} j                              |          !                    |          ||<   6 "                    ||          }|stG           j$        |          |_$        |S )Nzvalue_name (z3) cannot match an element in the DataFrame columns.id_vars
value_varsc                    g | ]	\  }}||
S  r*   ).0lab	not_founds      r   
<listcomp>zmelt.<locals>.<listcomp>G   s1       &Y)  r   zFThe following id_vars or value_vars are not present in the DataFrame: c                    g | ]}d | S )	variable_r*   )r+   is     r   r.   zmelt.<locals>.<listcomp>^   s    UUUOOOUUUr   r   z	var_name=z must be a scalar.r   T)r#   )namedtype   c              3  Z   K   | ]&}t          |t          j                   o|j        V  'd S N)r   npr3   _supports_2d)r+   dts     r   	<genexpr>zmelt.<locals>.<genexpr>z   sI       & &=?Jr28$$$8& & & & & &r   c                2    g | ]}j         d d |f         S r6   )iloc)r+   r1   r!   s     r   r.   zmelt.<locals>.<listcomp>~   s(    ===!UZ1===r   Fr   )%r   r   r   get_level_valuesget_indexer_foranyzipKeyErrorr<   algosuniquecopyr   r	   lennamessetranger2   r   shapepopr3   r7   r
   typetile_valuesdtypesvaluesravel	enumerate_get_level_valuesrepeat_constructorr   index)r!   r&   r'   var_namer"   	col_levelr#   value_vars_was_not_nonelevellabelsidxmissingmissing_labelsnum_rowsKnum_cols_adjustedmdatacolid_datamcolumnsr1   results   `                     r   r   r   +   sO    U]""%: % % %
 
 	
 w	5=AAG(4!*lEMJJJ *  M229==EEME:%##F++);;== 	 *-fg*>*>  N 3"03 3   # 	!Jqqq%,s"3"334EEJJLLEE

66yAAemZ00 	5=&''3s5=3F/G/G+H+HHH =.UUU3u}?R;S;S5T5TUUU ',m&8&D""*HH 
h		 9H999::::+KHaCLL(*,E 
E 
E))C..'-22 	E 1$$#WI0A$APTUUUc

 +T']]2GLVVVc

2CDDE#JJ!ZL0H{1~# & &CH<& & & # # #====uU[^'<'<===
 

 	j "M//44jH%% I I3]44Q77>>xHHc

x88F C"5;0ABBMr   datagroupsdictdropnac                    i }g }t                      }t          t          t          |                                                              }|                                D ]q\  }}t          |          |k    rt          d           fd|D             }	t          |	          ||<   |                    |           |	                    |          }rt           j                            |                    }
|
D ]%}t          j         |         j        |          ||<   &|rt          j        t          ||d                            t"                    |D ]}t%          ||                   z                                  s fd|                                D             }                     ||
|z             S )a  
    Reshape wide-format data to long. Generalized inverse of DataFrame.pivot.

    Accepts a dictionary, ``groups``, in which each key is a new column name
    and each value is a list of old column names that will be "melted" under
    the new column name as part of the reshape.

    Parameters
    ----------
    data : DataFrame
        The wide-format DataFrame.
    groups : dict
        {new_name : list_of_columns}.
    dropna : bool, default True
        Do not include columns whose entries are all NaN.

    Returns
    -------
    DataFrame
        Reshaped DataFrame.

    See Also
    --------
    melt : Unpivot a DataFrame from wide to long format, optionally leaving
        identifiers set.
    pivot : Create a spreadsheet-style pivot table as a DataFrame.
    DataFrame.pivot : Pivot without aggregation that can handle
        non-numeric data.
    DataFrame.pivot_table : Generalization of pivot that can handle
        duplicate values for one index/column pair.
    DataFrame.unstack : Pivot based on the index values instead of a
        column.
    wide_to_long : Wide panel to long format. Less flexible but more
        user-friendly than melt.

    Examples
    --------
    >>> data = pd.DataFrame({'hr1': [514, 573], 'hr2': [545, 526],
    ...                      'team': ['Red Sox', 'Yankees'],
    ...                      'year1': [2007, 2007], 'year2': [2008, 2008]})
    >>> data
       hr1  hr2     team  year1  year2
    0  514  545  Red Sox   2007   2008
    1  573  526  Yankees   2007   2008

    >>> pd.lreshape(data, {'year': ['year1', 'year2'], 'hr': ['hr1', 'hr2']})
          team  year   hr
    0  Red Sox  2007  514
    1  Yankees  2007  573
    2  Red Sox  2008  545
    3  Yankees  2008  526
    z$All column lists must be same lengthc                *    g | ]}|         j         S r*   )rO   )r+   rd   rh   s     r   r.   zlreshape.<locals>.<listcomp>   s     8883T#Y&888r   r   )r3   c                (    i | ]\  }}||         S r*   r*   )r+   kvmasks      r   
<dictcomp>zlreshape.<locals>.<dictcomp>   s#    :::DAqQ$:::r   r>   )rI   rG   nextiterrQ   itemsr   r   appendunionr   r   
differencer7   rN   rO   onesr$   r   allrV   )rh   ri   rk   rc   
pivot_colsall_colsra   targetrH   	to_concatid_colsrd   crq   s   `            @r   lreshaper      s   j EJ!eeHDfmmoo&&''((A ) )u::??CDDD8888%888	%i00f&!!!>>%((4<**84455G 3 3WT#Y.22c

 ;ws5A/00=== 	$ 	$AE%(OO#DDxxzz 	;::::EKKMM:::EUGj,@AAAr    \d+dfsepsuffixc                   dd}dd}t          |          s|g}nt          |          }| j                            |                                          rt          d          t          |          s|g}nt          |          }| |                                                                         rt          d          g }g }	|D ]H}
 || |
||          }|	                    |           |                     || |
||||                     It          |d	
          }| j        
                    |	          }| |         }t          |          d	k    r(|                    |                              |          S |                    |                                |                              ||gz             S )ax   
    Unpivot a DataFrame from wide to long format.

    Less flexible but more user-friendly than melt.

    With stubnames ['A', 'B'], this function expects to find one or more
    group of columns with format
    A-suffix1, A-suffix2,..., B-suffix1, B-suffix2,...
    You specify what you want to call this suffix in the resulting long format
    with `j` (for example `j='year'`)

    Each row of these wide variables are assumed to be uniquely identified by
    `i` (can be a single column name or a list of column names)

    All remaining variables in the data frame are left intact.

    Parameters
    ----------
    df : DataFrame
        The wide-format DataFrame.
    stubnames : str or list-like
        The stub name(s). The wide format variables are assumed to
        start with the stub names.
    i : str or list-like
        Column(s) to use as id variable(s).
    j : str
        The name of the sub-observation variable. What you wish to name your
        suffix in the long format.
    sep : str, default ""
        A character indicating the separation of the variable names
        in the wide format, to be stripped from the names in the long format.
        For example, if your column names are A-suffix1, A-suffix2, you
        can strip the hyphen by specifying `sep='-'`.
    suffix : str, default '\\d+'
        A regular expression capturing the wanted suffixes. '\\d+' captures
        numeric suffixes. Suffixes with no numbers could be specified with the
        negated character class '\\D+'. You can also further disambiguate
        suffixes, for example, if your wide variables are of the form A-one,
        B-two,.., and you have an unrelated column A-rating, you can ignore the
        last one by specifying `suffix='(!?one|two)'`. When all suffixes are
        numeric, they are cast to int64/float64.

    Returns
    -------
    DataFrame
        A DataFrame that contains each stub name as a variable, with new index
        (i, j).

    See Also
    --------
    melt : Unpivot a DataFrame from wide to long format, optionally leaving
        identifiers set.
    pivot : Create a spreadsheet-style pivot table as a DataFrame.
    DataFrame.pivot : Pivot without aggregation that can handle
        non-numeric data.
    DataFrame.pivot_table : Generalization of pivot that can handle
        duplicate values for one index/column pair.
    DataFrame.unstack : Pivot based on the index values instead of a
        column.

    Notes
    -----
    All extra variables are left untouched. This simply uses
    `pandas.melt` under the hood, but is hard-coded to "do the right thing"
    in a typical case.

    Examples
    --------
    >>> np.random.seed(123)
    >>> df = pd.DataFrame({"A1970" : {0 : "a", 1 : "b", 2 : "c"},
    ...                    "A1980" : {0 : "d", 1 : "e", 2 : "f"},
    ...                    "B1970" : {0 : 2.5, 1 : 1.2, 2 : .7},
    ...                    "B1980" : {0 : 3.2, 1 : 1.3, 2 : .1},
    ...                    "X"     : dict(zip(range(3), np.random.randn(3)))
    ...                   })
    >>> df["id"] = df.index
    >>> df
      A1970 A1980  B1970  B1980         X  id
    0     a     d    2.5    3.2 -1.085631   0
    1     b     e    1.2    1.3  0.997345   1
    2     c     f    0.7    0.1  0.282978   2
    >>> pd.wide_to_long(df, ["A", "B"], i="id", j="year")
    ... # doctest: +NORMALIZE_WHITESPACE
                    X  A    B
    id year
    0  1970 -1.085631  a  2.5
    1  1970  0.997345  b  1.2
    2  1970  0.282978  c  0.7
    0  1980 -1.085631  d  3.2
    1  1980  0.997345  e  1.3
    2  1980  0.282978  f  0.1

    With multiple id columns

    >>> df = pd.DataFrame({
    ...     'famid': [1, 1, 1, 2, 2, 2, 3, 3, 3],
    ...     'birth': [1, 2, 3, 1, 2, 3, 1, 2, 3],
    ...     'ht1': [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1],
    ...     'ht2': [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9]
    ... })
    >>> df
       famid  birth  ht1  ht2
    0      1      1  2.8  3.4
    1      1      2  2.9  3.8
    2      1      3  2.2  2.9
    3      2      1  2.0  3.2
    4      2      2  1.8  2.8
    5      2      3  1.9  2.4
    6      3      1  2.2  3.3
    7      3      2  2.3  3.4
    8      3      3  2.1  2.9
    >>> l = pd.wide_to_long(df, stubnames='ht', i=['famid', 'birth'], j='age')
    >>> l
    ... # doctest: +NORMALIZE_WHITESPACE
                      ht
    famid birth age
    1     1     1    2.8
                2    3.4
          2     1    2.9
                2    3.8
          3     1    2.2
                2    2.9
    2     1     1    2.0
                2    3.2
          2     1    1.8
                2    2.8
          3     1    1.9
                2    2.4
    3     1     1    2.2
                2    3.3
          2     1    2.3
                2    3.4
          3     1    2.1
                2    2.9

    Going from long back to wide just takes some creative use of `unstack`

    >>> w = l.unstack()
    >>> w.columns = w.columns.map('{0[0]}{0[1]}'.format)
    >>> w.reset_index()
       famid  birth  ht1  ht2
    0      1      1  2.8  3.4
    1      1      2  2.9  3.8
    2      1      3  2.2  2.9
    3      2      1  2.0  3.2
    4      2      2  1.8  2.8
    5      2      3  1.9  2.4
    6      3      1  2.2  3.3
    7      3      2  2.3  3.4
    8      3      3  2.1  2.9

    Less wieldy column names are also handled

    >>> np.random.seed(0)
    >>> df = pd.DataFrame({'A(weekly)-2010': np.random.rand(3),
    ...                    'A(weekly)-2011': np.random.rand(3),
    ...                    'B(weekly)-2010': np.random.rand(3),
    ...                    'B(weekly)-2011': np.random.rand(3),
    ...                    'X' : np.random.randint(3, size=3)})
    >>> df['id'] = df.index
    >>> df # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
       A(weekly)-2010  A(weekly)-2011  B(weekly)-2010  B(weekly)-2011  X  id
    0        0.548814        0.544883        0.437587        0.383442  0   0
    1        0.715189        0.423655        0.891773        0.791725  1   1
    2        0.602763        0.645894        0.963663        0.528895  1   2

    >>> pd.wide_to_long(df, ['A(weekly)', 'B(weekly)'], i='id',
    ...                 j='year', sep='-')
    ... # doctest: +NORMALIZE_WHITESPACE
             X  A(weekly)  B(weekly)
    id year
    0  2010  0   0.548814   0.437587
    1  2010  1   0.715189   0.891773
    2  2010  1   0.602763   0.963663
    0  2011  0   0.544883   0.383442
    1  2011  1   0.423655   0.791725
    2  2011  1   0.645894   0.528895

    If we have many columns, we could also use a regex to find our
    stubnames and pass that list on to wide_to_long

    >>> stubnames = sorted(
    ...     set([match[0] for match in df.columns.str.findall(
    ...         r'[A-B]\(.*\)').values if match != []])
    ... )
    >>> list(stubnames)
    ['A(weekly)', 'B(weekly)']

    All of the above examples have integers as suffixes. It is possible to
    have non-integers as suffixes.

    >>> df = pd.DataFrame({
    ...     'famid': [1, 1, 1, 2, 2, 2, 3, 3, 3],
    ...     'birth': [1, 2, 3, 1, 2, 3, 1, 2, 3],
    ...     'ht_one': [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1],
    ...     'ht_two': [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9]
    ... })
    >>> df
       famid  birth  ht_one  ht_two
    0      1      1     2.8     3.4
    1      1      2     2.9     3.8
    2      1      3     2.2     2.9
    3      2      1     2.0     3.2
    4      2      2     1.8     2.8
    5      2      3     1.9     2.4
    6      3      1     2.2     3.3
    7      3      2     2.3     3.4
    8      3      3     2.1     2.9

    >>> l = pd.wide_to_long(df, stubnames='ht', i=['famid', 'birth'], j='age',
    ...                     sep='_', suffix=r'\w+')
    >>> l
    ... # doctest: +NORMALIZE_WHITESPACE
                      ht
    famid birth age
    1     1     one  2.8
                two  3.4
          2     one  2.9
                two  3.8
          3     one  2.2
                two  2.9
    2     1     one  2.0
                two  3.2
          2     one  1.8
                two  2.8
          3     one  1.9
                two  2.4
    3     1     one  2.2
                two  3.3
          2     one  2.3
                two  3.4
          3     one  2.1
                two  2.9
    stubr   r   r   c                    dt          j        |           t          j        |           | d}| j        | j        j                            |                   S )N^$)reescaper   r   match)r   r   r   r   regexs        r   get_var_namesz#wide_to_long.<locals>.get_var_names  sK    ?RYt__?binn?f???z"*...u5566r   c                f   t          | |||                    |          |          }||         j                            t	          j        ||z             dd          ||<   	 t          ||                   ||<   n# t          t          t          f$ r Y nw xY w|
                    ||gz             S )N)r&   r'   r"   rX   r   T)r   )r   rstripr   replacer   r   r   	TypeErrorr   OverflowError	set_index)r   r   r1   jr'   r   newdfs          r   	melt_stubzwide_to_long.<locals>.melt_stub  s    !{{3''
 
 
 8<''	$*(=(=r'NNa	!%(++E!HH:}5 	 	 	D	 qA3w'''s   $A= =BBz,stubname can't be identical to a column namez3the id variables need to uniquely identify each rowr4   )axis)onN)r   r   r   r   r   r   )r   r   r   r   )r   r   r   isinrA   r   
duplicatedextendrv   r
   rx   rG   r   joinmergereset_index)r   	stubnamesr1   r   r   r   r   r   _meltedvalue_vars_flattenedr   	value_varmeltedr&   news                  r   wide_to_longr      s   \7 7 7 7( ( ( (& 	"" $K		OO		zy!!%%'' IGHHH?? CGG	!u PNOOOG B B!M"dC88	##I...yyT1aC@@AAAAG!$$$Fj##$899G
W+C
1vv{{}}Q$$V,,,yy++--!y44>>qA3wGGGr   )r   r   r   r   )NNNr    NT)r!   r   r"   r   r#   r$   r   r   )T)rh   r   ri   rj   rk   r$   r   r   )r   r   )r   r   r   r   r   r   r   r   )'
__future__r   r   typingr   numpyr7   pandas.util._decoratorsr   pandas.core.dtypes.commonr   pandas.core.dtypes.concatr   pandas.core.dtypes.missingr   pandas.core.algorithmscore
algorithmsrD   pandas.core.indexes.apir	   pandas.core.reshape.concatr
   pandas.core.reshape.utilr   pandas.core.shared_docsr   pandas.core.tools.numericr   collections.abcr   pandas._typingr   pandasr   r   r   r   r   r*   r   r   <module>r      s   " " " " " " 				                 , , , , , , 2 2 2 2 2 2 3 3 3 3 3 3 , , , , , , & & & & & & & & & . . . . . . - - - - - - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 !((((((++++++          
,v
NEU!V!V
VWW "^ ^ ^ ^ XW^BMB MB MB MB MBb BHcH cH cH cH cH cH cHr   