1 from _typeshed import Self
2 from collections.abc import Iterator
3 from typing import Any, Callable, Generic, Iterable, List, Mapping, Pattern, Tuple, Type, TypeVar, Union, overload
5 from . import BeautifulSoup
6 from .builder import TreeBuilder
7 from .formatter import Formatter, _EntitySubstitution
9 DEFAULT_OUTPUT_ENCODING: str
11 nonwhitespace_re: Pattern[str]
12 whitespace_re: Pattern[str]
13 PYTHON_SPECIFIC_ENCODINGS: set[str]
15 class NamespacedAttribute(str):
16 def __new__(cls: Type[Self], prefix: str, name: str | None = ..., namespace: str | None = ...) -> Self: ...
18 class AttributeValueWithCharsetSubstitution(str): ...
20 class CharsetMetaAttributeValue(AttributeValueWithCharsetSubstitution):
21 def __new__(cls, original_value): ...
22 def encode(self, encoding: str) -> str: ... # type: ignore # incompatible with str
24 class ContentMetaAttributeValue(AttributeValueWithCharsetSubstitution):
25 CHARSET_RE: Pattern[str]
26 def __new__(cls, original_value): ...
27 def encode(self, encoding: str) -> str: ... # type: ignore # incompatible with str
29 _PageElementT = TypeVar("_PageElementT", bound=PageElement)
30 # The wrapping Union[] can be removed once mypy fully supports | in type aliases.
31 _SimpleStrainable = Union[str, bool, None, bytes, Pattern[str], Callable[[str], bool], Callable[[Tag], bool]]
32 _Strainable = Union[_SimpleStrainable, Iterable[_SimpleStrainable]]
33 _SimpleNormalizedStrainable = Union[str, bool, None, Pattern[str], Callable[[str], bool], Callable[[Tag], bool]]
34 _NormalizedStrainable = Union[_SimpleNormalizedStrainable, Iterable[_SimpleNormalizedStrainable]]
38 previous_element: PageElement | None
39 next_element: PageElement | None
40 next_sibling: PageElement | None
41 previous_sibling: PageElement | None
44 parent: Tag | None = ...,
45 previous_element: PageElement | None = ...,
46 next_element: PageElement | None = ...,
47 previous_sibling: PageElement | None = ...,
48 next_sibling: PageElement | None = ...,
50 def format_string(self, s: str, formatter: Formatter | str | None) -> str: ...
51 def formatter_for_name(self, formatter: Formatter | str | _EntitySubstitution): ...
52 nextSibling: PageElement | None
53 previousSibling: PageElement | None
55 def stripped_strings(self) -> Iterator[str]: ...
56 def get_text(self, separator: str = ..., strip: bool = ..., types: Tuple[Type[NavigableString], ...] = ...) -> str: ...
59 def text(self) -> str: ...
60 def replace_with(self: Self, *args: PageElement | str) -> Self: ...
61 replaceWith = replace_with
62 def unwrap(self: Self) -> Self: ...
63 replace_with_children = unwrap
64 replaceWithChildren = unwrap
65 def wrap(self, wrap_inside: _PageElementT) -> _PageElementT: ...
66 def extract(self: Self, _self_index: int | None = ...) -> Self: ...
67 def insert(self, position: int, new_child: PageElement | str) -> None: ...
68 def append(self, tag: PageElement | str) -> None: ...
69 def extend(self, tags: Iterable[PageElement | str]) -> None: ...
70 def insert_before(self, *args: PageElement | str) -> None: ...
71 def insert_after(self, *args: PageElement | str) -> None: ...
74 name: _Strainable | SoupStrainer | None = ...,
75 attrs: dict[str, _Strainable] | _Strainable = ...,
76 text: _Strainable | None = ...,
77 **kwargs: _Strainable,
78 ) -> Tag | NavigableString | None: ...
82 name: _Strainable | SoupStrainer | None = ...,
83 attrs: dict[str, _Strainable] | _Strainable = ...,
84 text: _Strainable | None = ...,
85 limit: int | None = ...,
86 **kwargs: _Strainable,
87 ) -> ResultSet[PageElement]: ...
88 findAllNext = find_all_next
89 def find_next_sibling(
91 name: _Strainable | SoupStrainer | None = ...,
92 attrs: dict[str, _Strainable] | _Strainable = ...,
93 text: _Strainable | None = ...,
94 **kwargs: _Strainable,
95 ) -> Tag | NavigableString | None: ...
96 findNextSibling = find_next_sibling
97 def find_next_siblings(
99 name: _Strainable | SoupStrainer | None = ...,
100 attrs: dict[str, _Strainable] | _Strainable = ...,
101 text: _Strainable | None = ...,
102 limit: int | None = ...,
103 **kwargs: _Strainable,
104 ) -> ResultSet[PageElement]: ...
105 findNextSiblings = find_next_siblings
106 fetchNextSiblings = find_next_siblings
109 name: _Strainable | SoupStrainer | None = ...,
110 attrs: dict[str, _Strainable] | _Strainable = ...,
111 text: _Strainable | None = ...,
112 **kwargs: _Strainable,
113 ) -> Tag | NavigableString | None: ...
114 findPrevious = find_previous
115 def find_all_previous(
117 name: _Strainable | SoupStrainer | None = ...,
118 attrs: dict[str, _Strainable] | _Strainable = ...,
119 text: _Strainable | None = ...,
120 limit: int | None = ...,
121 **kwargs: _Strainable,
122 ) -> ResultSet[PageElement]: ...
123 findAllPrevious = find_all_previous
124 fetchPrevious = find_all_previous
125 def find_previous_sibling(
127 name: _Strainable | SoupStrainer | None = ...,
128 attrs: dict[str, _Strainable] | _Strainable = ...,
129 text: _Strainable | None = ...,
130 **kwargs: _Strainable,
131 ) -> Tag | NavigableString | None: ...
132 findPreviousSibling = find_previous_sibling
133 def find_previous_siblings(
135 name: _Strainable | SoupStrainer | None = ...,
136 attrs: dict[str, _Strainable] | _Strainable = ...,
137 text: _Strainable | None = ...,
138 limit: int | None = ...,
139 **kwargs: _Strainable,
140 ) -> ResultSet[PageElement]: ...
141 findPreviousSiblings = find_previous_siblings
142 fetchPreviousSiblings = find_previous_siblings
145 name: _Strainable | SoupStrainer | None = ...,
146 attrs: dict[str, _Strainable] | _Strainable = ...,
147 **kwargs: _Strainable,
149 findParent = find_parent
152 name: _Strainable | SoupStrainer | None = ...,
153 attrs: dict[str, _Strainable] | _Strainable = ...,
154 limit: int | None = ...,
155 **kwargs: _Strainable,
156 ) -> ResultSet[Tag]: ...
157 findParents = find_parents
158 fetchParents = find_parents
160 def next(self) -> Tag | NavigableString | None: ...
162 def previous(self) -> Tag | NavigableString | None: ...
164 def next_elements(self) -> Iterable[PageElement]: ...
166 def next_siblings(self) -> Iterable[PageElement]: ...
168 def previous_elements(self) -> Iterable[PageElement]: ...
170 def previous_siblings(self) -> Iterable[PageElement]: ...
172 def parents(self) -> Iterable[Tag]: ...
174 def decomposed(self) -> bool: ...
175 def nextGenerator(self) -> Iterable[PageElement]: ...
176 def nextSiblingGenerator(self) -> Iterable[PageElement]: ...
177 def previousGenerator(self) -> Iterable[PageElement]: ...
178 def previousSiblingGenerator(self) -> Iterable[PageElement]: ...
179 def parentGenerator(self) -> Iterable[Tag]: ...
181 class NavigableString(str, PageElement):
184 known_xml: bool | None
185 def __new__(cls: Type[Self], value: str | bytes) -> Self: ...
186 def __copy__(self: Self) -> Self: ...
187 def __getnewargs__(self) -> tuple[str]: ...
188 def output_ready(self, formatter: Formatter | str | None = ...) -> str: ...
190 def name(self) -> None: ...
192 def strings(self) -> Iterable[str]: ...
194 class PreformattedString(NavigableString):
197 def output_ready(self, formatter: Formatter | str | None = ...) -> str: ...
199 class CData(PreformattedString):
203 class ProcessingInstruction(PreformattedString):
207 class XMLProcessingInstruction(ProcessingInstruction):
211 class Comment(PreformattedString):
215 class Declaration(PreformattedString):
219 class Doctype(PreformattedString):
221 def for_name_and_ids(cls, name: str | None, pub_id: str, system_id: str) -> Doctype: ...
225 class Stylesheet(NavigableString): ...
226 class Script(NavigableString): ...
227 class TemplateString(NavigableString): ...
229 class Tag(PageElement):
230 parser_class: Type[BeautifulSoup] | None
232 namespace: str | None
234 sourceline: int | None
235 sourcepos: int | None
236 known_xml: bool | None
237 attrs: Mapping[str, str]
238 contents: list[PageElement]
240 can_be_empty_element: bool | None
241 cdata_list_attributes: list[str] | None
242 preserve_whitespace_tags: list[str] | None
245 parser: BeautifulSoup | None = ...,
246 builder: TreeBuilder | None = ...,
247 name: str | None = ...,
248 namespace: str | None = ...,
249 prefix: str | None = ...,
250 attrs: Mapping[str, str] | None = ...,
251 parent: Tag | None = ...,
252 previous: PageElement | None = ...,
253 is_xml: bool | None = ...,
254 sourceline: int | None = ...,
255 sourcepos: int | None = ...,
256 can_be_empty_element: bool | None = ...,
257 cdata_list_attributes: list[str] | None = ...,
258 preserve_whitespace_tags: list[str] | None = ...,
259 interesting_string_types: Type[NavigableString] | Tuple[Type[NavigableString], ...] | None = ...,
261 parserClass: Type[BeautifulSoup] | None
262 def __copy__(self: Self) -> Self: ...
264 def is_empty_element(self) -> bool: ...
265 isSelfClosing = is_empty_element
267 def string(self) -> str | None: ...
269 def string(self, string: str) -> None: ...
270 DEFAULT_INTERESTING_STRING_TYPES: Tuple[Type[NavigableString], ...]
272 def strings(self) -> Iterable[str]: ...
273 def decompose(self) -> None: ...
274 def clear(self, decompose: bool = ...) -> None: ...
275 def smooth(self) -> None: ...
276 def index(self, element: PageElement) -> int: ...
277 def get(self, key: str, default: str | list[str] | None = ...) -> str | list[str] | None: ...
278 def get_attribute_list(self, key: str, default: str | list[str] | None = ...) -> list[str]: ...
279 def has_attr(self, key: str) -> bool: ...
280 def __hash__(self) -> int: ...
281 def __getitem__(self, key: str) -> str | list[str]: ...
282 def __iter__(self) -> Iterable[PageElement]: ...
283 def __len__(self) -> int: ...
284 def __contains__(self, x: object) -> bool: ...
285 def __bool__(self) -> bool: ...
286 def __setitem__(self, key: str, value: str | list[str]) -> None: ...
287 def __delitem__(self, key: str) -> None: ...
288 def __getattr__(self, tag: str) -> Tag | None: ...
289 def __eq__(self, other: object) -> bool: ...
290 def __ne__(self, other: object) -> bool: ...
291 def __unicode__(self) -> str: ...
293 self, encoding: str = ..., indent_level: int | None = ..., formatter: str | Formatter = ..., errors: str = ...
295 def decode(self, indent_level: int | None = ..., eventual_encoding: str = ..., formatter: str | Formatter = ...) -> str: ...
297 def prettify(self, encoding: str, formatter: str | Formatter = ...) -> bytes: ...
299 def prettify(self, encoding: None = ..., formatter: str | Formatter = ...) -> str: ...
301 self, indent_level: int | None = ..., eventual_encoding: str = ..., formatter: str | Formatter = ...
303 def encode_contents(self, indent_level: int | None = ..., encoding: str = ..., formatter: str | Formatter = ...) -> bytes: ...
304 def renderContents(self, encoding: str = ..., prettyPrint: bool = ..., indentLevel: int = ...) -> bytes: ...
307 name: _Strainable | None = ...,
308 attrs: dict[str, _Strainable] | _Strainable = ...,
309 recursive: bool = ...,
310 text: _Strainable | None = ...,
311 **kwargs: _Strainable,
312 ) -> Tag | NavigableString | None: ...
316 name: _Strainable | None = ...,
317 attrs: dict[str, _Strainable] | _Strainable = ...,
318 recursive: bool = ...,
319 text: _Strainable | None = ...,
320 limit: int | None = ...,
321 **kwargs: _Strainable,
322 ) -> ResultSet[Any]: ...
325 findChildren = find_all
327 def children(self) -> Iterable[PageElement]: ...
329 def descendants(self) -> Iterable[PageElement]: ...
330 def select_one(self, selector: str, namespaces: Any | None = ..., **kwargs) -> Tag | None: ...
331 def select(self, selector: str, namespaces: Any | None = ..., limit: int | None = ..., **kwargs) -> ResultSet[Tag]: ...
332 def childGenerator(self) -> Iterable[PageElement]: ...
333 def recursiveChildGenerator(self) -> Iterable[PageElement]: ...
334 def has_key(self, key: str) -> bool: ...
337 name: _NormalizedStrainable
338 attrs: dict[str, _NormalizedStrainable]
339 text: _NormalizedStrainable
342 name: _Strainable | None = ...,
343 attrs: dict[str, _Strainable] | _Strainable = ...,
344 text: _Strainable | None = ...,
345 **kwargs: _Strainable,
347 def search_tag(self, markup_name: Tag | str | None = ..., markup_attrs=...): ...
348 searchTag = search_tag
349 def search(self, markup: PageElement | Iterable[PageElement]): ...
351 class ResultSet(List[_PageElementT], Generic[_PageElementT]):
353 def __init__(self, source: SoupStrainer, result: Iterable[_PageElementT] = ...) -> None: ...