
    g+                         d dl Zd dlZddl i dddddd	d
dddddddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0
Z G d1 d2ej                  e      Zy)3    N   )*   u   €   u   ‚   u   ƒ   u   „   u   …   u   †   u   ‡   u   ˆ   u   ‰   u   Š   u   ‹   u   Œ   u   Ž   u   ‘   u   ’   u   “   u   ”u   •u   –u   —u   ˜u   ™u   šu   ›u   œu   žu   Ÿ)
                              c                       e Zd Z ej                  d      Z ej                  d      Zh dZd fd	Z fdZ	d Z
d Z	 ej                  j                  j                  e_        d Z	 ej                  j$                  j                  e_        d	 Z fd
Zed        Zd Zd Zd Zd Zd Zd Zd Zd Z ej                  d      j<                  Zd Z ed        Z!ed        Z"d Z#d Z$ xZ%S # e$ r( ej                  j                  j                   e_        Y w xY w# e$ r( ej                  j$                  j                   e_        Y w xY w)_BaseHTMLProcessorz[<>'"]z&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)>   brhrcolimgwbrareabaselinkmetaembedframeinputparamtrackkeygensourcecommandisindexbasefontc                 X    |r|| _         || _        g | _        t        t        |           y N)encoding_typepiecessuperr!   __init__)selfr7   r8   	__class__s      B/var/www/html/venv/lib/python3.12/site-packages/feedparser/html.pyr;   z_BaseHTMLProcessor.__init__Y   s)    $DM
 $02    c                 8    g | _         t        t        |           y r6   )r9   r:   r!   reset)r<   r=   s    r>   rA   z_BaseHTMLProcessor.reset`   s     $-/r?   c                 l    |j                  d      }|| j                  v rd|z   dz   S d|z   dz   |z   dz   S )z=
        :type match: Match[str]
        :rtype: str
        r   < />z></>)groupelements_no_end_tag)r<   matchtags      r>   _shorttag_replacez$_BaseHTMLProcessor._shorttag_replaced   sG     kk!n$***9u$$9u$s*S00r?   c                     t         r6   NotImplementedErrorr<   is     r>   goaheadz_BaseHTMLProcessor.goaheadu       !!r?   c                     t         r6   rL   rN   s     r>   __parse_starttagz#_BaseHTMLProcessor.__parse_starttag   rQ   r?   c                     | j                  |      }| j                  dk(  r5|dkD  r0| j                  |dz
  | dk(  r| j                  | j                         |S )Napplication/xhtml+xml   z/>)"_BaseHTMLProcessor__parse_starttagr8   rawdataunknown_endtaglasttag)r<   rO   js      r>   parse_starttagz!_BaseHTMLProcessor.parse_starttag   sS    !!!$::001uac!,4##DLL1r?   c                 &   t        j                  dd|t         j                        }t        j                  d| j                  |      }|j	                  dd      }|j	                  dd      }t
        t        |   |       t
        t        | #          y)	z6
        :type data: str
        :rtype: None
        z<!((?!DOCTYPE|--|\[))z&lt;!\1z<([^<>\s]+?)\s*/>z&#39;'z&#34;"N)	resub
IGNORECASErJ   replacer:   r!   feedclose)r<   datar=   s     r>   rd   z_BaseHTMLProcessor.feed   su     vv.
D"--Pvv*D,B,BDI||GS)||GS) $,T2 $-/r?   c                    | s| S | D ci c]  \  }}|j                         | }}}|j                         D cg c]!  \  }}||dv xr |j                         xs |f# } }}| j                          | S c c}}w c c}}w )zZ
        :type attrs: List[Tuple[str, str]]
        :rtype: List[Tuple[str, str]]
        )reltype)loweritemssort)attrskvattrs_ds       r>   normalize_attrsz"_BaseHTMLProcessor.normalize_attrs   s     L -22DAq1779a<22  
1 _$27a8
 
 	

 3
s
   A6&A<c                    g }d}|r|D ]j  \  }}|j                  dd      }|j                  dd      }|j                  dd      }| j                  j                  d|      }|j                  ||f       l dj	                  d	 |D              }|| j
                  v r"| j                  j                  d||d
       y| j                  j                  d||d       y)z`
        :type tag: str
        :type attrs: List[Tuple[str, str]]
        :rtype: None
         rE   z&gt;rC   &lt;r_   z&quot;z&amp;c              3   4   K   | ]  \  }}d |d|d  yw) z="r_   N ).0keyvalues      r>   	<genexpr>z6_BaseHTMLProcessor.unknown_starttag.<locals>.<genexpr>   s!      C "5)s   rD   N)rc   bare_ampersandra   appendjoinrG   r9   )r<   rI   rm   uattrsstrattrsry   rz   s          r>   unknown_starttagz#_BaseHTMLProcessor.unknown_starttag   s     # ,
Uc62c62c84++//?sEl+, ww "( H $***KKS(;<KK39:r?   c                 ^    || j                   vr| j                  j                  d|z         yy)z5
        :type tag: str
        :rtype: None
        z</%s>N)rG   r9   r}   )r<   rI   s     r>   rY   z!_BaseHTMLProcessor.unknown_endtag   s-     d...KKw}- /r?   c           	      >   |j                         }|j                  d      rt        |dd d      }nt        |      }|t        v r;| j                  j                  dt        t        t        |               dd z         y| j                  j                  d|z         y)5
        :type ref: str
        :rtype: None
        xr   N   &#%s;)rj   
startswithint_cp1252r9   r}   hexord)r<   refrz   s      r>   handle_charrefz!_BaseHTMLProcessor.handle_charref   s     iik>>#AB$EHEGKKwS-@)A!")EEFKKw}-r?   c                     |t         j                  j                  v s|dk(  r| j                  j	                  d|z         y| j                  j	                  d|z         y)r   apos&%s;z&amp;%sN)htmlentitiesname2codepointr9   r}   )r<   r   s     r>   handle_entityrefz#_BaseHTMLProcessor.handle_entityref   sH     $--...#-KKv|,KKy3/r?   c                 :    | j                   j                  |       y)6
        :type text: str
        :rtype: None
        Nr9   r}   r<   texts     r>   handle_dataz_BaseHTMLProcessor.handle_data   s     	4 r?   c                 @    | j                   j                  d|z         y)r   z	<!--%s-->Nr   r   s     r>   handle_commentz!_BaseHTMLProcessor.handle_comment  s     	;-.r?   c                 @    | j                   j                  d|z         y)r   z<?%s>Nr   r   s     r>   	handle_piz_BaseHTMLProcessor.handle_pi  s     	7T>*r?   c                 @    | j                   j                  d|z         y)r   z<!%s>Nr   r   s     r>   handle_declz_BaseHTMLProcessor.handle_decl  s     	7T>*r?   z[a-zA-Z][-_.a-zA-Z0-9:]*\s*c                 ,   | j                   }t        |      }||k(  ry| j                  ||      }|rR|j                         }|j	                         }|t        |      z   |k(  ry|j                         |j                         fS | j                  |       y)zh
        :type i: int
        :type declstartpos: int
        :rtype: Tuple[Optional[str], int]
        )N)rX   len_new_declname_matchrF   striprj   endr   )r<   rO   declstartposrX   nmsnames           r>   
_scan_namez_BaseHTMLProcessor._scan_name'  s     ,,L6$$Wa0	A779DCF
q ::<((W%r?   c                     d| z  S )5
        :type name: str
        :rtype: str
        r   rw   r   s    r>   convert_charrefz"_BaseHTMLProcessor.convert_charref>  s     ~r?   c                     d| z  S )r   r   rw   r   s    r>   convert_entityrefz$_BaseHTMLProcessor.convert_entityrefG  s     }r?   c                 8    dj                  | j                        S )zGReturn processed HTML as a single string.

        :rtype: str
        rs   )r~   r9   )r<   s    r>   outputz_BaseHTMLProcessor.outputP  s     wwt{{##r?   c                     	 t         j                  j                  | |      S # t         j                  $ r | j	                  d       |dz   cY S w xY w)z2
        :type i: int
        :rtype: int
        rt   r   )sgmllib
SGMLParserparse_declarationSGMLParseErrorr   rN   s     r>   r   z$_BaseHTMLProcessor.parse_declarationX  sL    	%%77a@@%% 	V$Q3J	s   " )AA)NrU   )&__name__
__module____qualname__r`   compilespecialr|   rG   r;   rA   rJ   rP   r   r   __code__AttributeError	func_coderW   r\   rd   staticmethodrq   r   rY   r   r   r   r   r   r   rH   r   r   r   r   r   r   __classcell__)r=   s   @r>   r!   r!   @   s|   bjj&GRZZ BCN,30
1""A"--55>>"Q$+$6$6$E$E$N$N!0  &;8	..&0	!/+
+ %"**%CDJJ.    ${  A $..66@@A  Q &-%7%7%F%F%P%P"Qs$    %C/ )%D /*DD*EEr!   )	html.entitiesr   r`   sgmlr   r   r   objectr!   rw   r?   r>   <module>r      s  8  	   	
             !" #$ 
									7>c++V cr?   