U
    g(                     @   s6  U d Z ddlZddlmZmZmZmZmZmZm	Z	 ddl
mZmZmZ daeed< daeed< g aee ed	< d
Zeed< G dd deZde	eedf e	eedf e	eee df eeeee f dddZee ee ee dddZee edddZeeee ee f eee ee f eee ee f ee	eeeef f eeef eee f eedf eeeeeeeeegdf  eeeeee ee f dddZeee	eef  ee ee ee	eeeef f eeef eee f eedf eeeeeeeeeegdf  eeef dddZdS )ze
Code related to text extraction.

Some parts are still in _page.py. In doubt, they will stay there.
    N)AnyCallableDictListOptionalTupleUnion   )DictionaryObjectTextStringObjectencode_pdfdocencodingCUSTOM_RTL_MINCUSTOM_RTL_MAXCUSTOM_RTL_SPECIAL_CHARS    LAYOUT_NEW_BT_GROUP_SPACE_WIDTHSc                   @   s   e Zd ZdS )OrientationNotFoundErrorN)__name__
__module____qualname__ r   r   C/tmp/pip-unpacked-wheel-z3c9upm3/pypdf/_text_extraction/__init__.pyr      s   r   )_min_maxspecialsreturnc                 C   sv   t | tr| ant | tr"t| at |tr2|ant |trDt|at |tr^dd |D ant |trl|atttfS )a  
    Change the Right-To-Left and special characters custom parameters.

    Args:
        _min: The new minimum value for the range of custom characters that
            will be written right to left.
            If set to ``None``, the value will not be changed.
            If set to an integer or string, it will be converted to its ASCII code.
            The default value is -1, which sets no additional range to be converted.
        _max: The new maximum value for the range of custom characters that will
            be written right to left.
            If set to ``None``, the value will not be changed.
            If set to an integer or string, it will be converted to its ASCII code.
            The default value is -1, which sets no additional range to be converted.
        specials: The new list of special characters to be inserted in the
            current insertion order.
            If set to ``None``, the current value will not be changed.
            If set to a string, it will be converted to a list of ASCII codes.
            The default value is an empty list.

    Returns:
        A tuple containing the new values for ``CUSTOM_RTL_MIN``,
        ``CUSTOM_RTL_MAX``, and ``CUSTOM_RTL_SPECIAL_CHARS``.
    c                 S   s   g | ]}t |qS r   )ord.0xr   r   r   
<listcomp>=   s     z"set_custom_rtl.<locals>.<listcomp>)
isinstanceintr   strr   r   r   list)r   r   r   r   r   r   set_custom_rtl   s    





r&   )mnr   c              	   C   s   | d |d  | d |d   | d |d  | d |d   | d |d  | d |d   | d |d  | d |d   | d |d  | d |d   |d  | d |d  | d |d   |d  gS )Nr      r	         r   r   )r'   r(   r   r   r   multC   s    &&r,   )r'   r   c                 C   s8   | d dkrdS | d dk r dS | d dkr0dS dS d S )	Nr*   gư>r   gư   r)   Z     r   )r'   r   r   r   orientN   s    r0   .)text	cmtm_prevcmtm_matrix	memo_cmtmcmaporientationsoutput	font_sizevisitor_text
spacewidthr   c
                 C   sv  |d }
|d }|d }|d }|d }|d }t ||
}t ||}t|}|d |d  }|d |d  }tt|d |d  t|d |d   }|| }|}
||krtz|dkrZ|d| k r||  d d	krV|| d	 7 }|d k	r|| d	 |||d | d
} nBt||d k r@t||	| d kr@||  d dkr@| d7 } n|dkr|d| kr||  d d	kr|| d	 7 }|d k	r|| d	 |||d | d
} nBt||d k r@t||	| d kr@||  d dkr@| d7 } nB|dkr|d| kr\||  d d	kr|| d	 7 }|d k	rV|| d	 |||d | d
} nBt||d k r@t||	| d kr@||  d dkr@| d7 } n|dkr@|d| k r||  d d	kr@|| d	 7 }|d k	r|| d	 |||d | d
} nBt||d k r@t||	| d kr@||  d dkr@| d7 } W n tk
rX   Y nX | }| }
| ||
|fS )Nr   r)   r+   r   r*   r	   gr   
 g333333?    r-   g?r.   r/   )r,   r0   mathsqrtabsr   	Exceptioncopy)r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   Zcm_prevZtm_prev	cm_matrix	tm_matrixZmemo_cmZmemo_tmZm_prevr'   orientationZdelta_xZdelta_ykfr   r   r   crlf_space_checkY   s    

.








rI   )r1   operandsrD   rE   r5   r6   r7   r8   rtl_dirr9   r   c
                    sx  t ||}
t|
}||krpt|dkrpt|d trH| |d 7 } n(d}t|d trft|d n|d }t d trz| d d}W q tk
r   | d dkrdndd}Y qX nd fdd|D } fdd|D D ]~}t|d	krt	|}nd	}|d
ksrd|  kr2dksrn d|  krLdksrn d|  krfdksrn |t
kr|r||  n| | } qd|  krdksn d|  krdksn d|  krdksn t|  krtkr2n n>|s(d}|| 7 }|	d k	r$|	| || d | d} ||  } q|rfd}|| 7 }|	d k	rb|	| || d | d} | | } q| |fS )Nr   r<   surrogatepasscharmapz	utf-16-bec                    s2   g | ]*}| d  kr  d  | nt |f qS )r   )bytesdecoder   r5   r   r   r!      s     zhandle_tj.<locals>.<listcomp>c                    s(   g | ] }| d  kr  d  | n|qS )r)   r   r   rP   r   r   r!      s     r)   /   :   @   i    io   i   i!  i  i  i  i  ip  i  Tr*   F)r,   r0   lenr"   r$   r   rO   rB   joinr   r   r   r   )r1   rJ   rD   rE   r5   r6   r7   r8   rK   r9   r'   rF   tttr    Zxxr   rP   r   	handle_tj   s    

 

 

 

 

 

 

 




rX   )NNN)__doc__r?   typingr   r   r   r   r   r   r   Zgenericr
   r   r   r   r#   __annotations__r   r   r   rB   r   r$   r&   floatr,   r0   rI   boolrX   r   r   r   r   <module>   s^   $   -&
t&

