Files
INTUIA/Programa final/spacy/__pycache__/util.cpython-312.pyc
T

714 lines
84 KiB
Plaintext
Raw Normal View History

2026-03-15 13:27:50 +00:00
Ë
=û gæãó’ddlZddlZddlZddlZddlZddlZddlZddlZddlZddl Z ddl
Z
ddl Z ddl Z ddl
Z
ddlZddlZddlZddlmZddlmZddlmZddlmZddlmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*ddl+Z+ddl,Z,ddl-Z-ddl.Z.ddl/Z/ddl+m0Z0m1Z1ddl2m3Z3dd l4m5Z5m6Z6dd
l7m8Z8m9Z9dd l:m;Z;m<Z<m=Z=m>Z>m?Z?m@Z@mAZA ddlBZCdd l:mEZEmFZFmGZGd
dlHmIZId
dlJmKZKmCZCmLZLmMZMd
dlNmOZOmPZPmQZQd
dlRmSZSerd
dlTmUZUmVZVd
dlWmXZXmYZYd
dlZm[Z[e-j¸e-jº«Z_dZ`gd¢Zagd¢Zbed«Zde«ZfefjÏed««edjÓef«Gdd«ZjGdde/jÖ«ZkGddel«ZmGd „d!en«Zod"epd#eqfd$„Zrd"epd#e$epfd%„Zsd"epd#e(d&fd'„Ztd(epd)e(d&d#dfd*„Zud+ed#efd,„Zvd+e)epefd#e)elenffd-„Zwd.ed#efd/„Zxeo«Zyd0eyeyeyem«d1œd(e)epefd2e)d3eqfd4e)epeepfd5e)epeepfd6e)epeepfd7e)eepefe<fd#d&fd8„Zzd0eyeyeyem«d1œd(epd2e)d3eqfd4e)epeepfd5e)epeepfd6e)epeepfd7e)eepefe<fd#d&fd9„Z{dd0eyeyeyem«d:œd;ed<e$eepefd2e)d3eqfd4e)epeepfd5e)epeepfd6e)epeepfd7e)eepefe<fd#d&fd=„Z|em«d0eyeyeyd>d0d?œd7e)eepefe<fd<eepefd2e)d3eqfd4e)epeepfd5e)epeepfd6e)epeepfd@eqdAeqd#d&fdB„Z}d7e)eepefe<fd#eepeepefffdC„Z~d7e<dDe!e$epd#e'edEffdF„Zd0eyeyeyem«d1œdGe)eepfd2e)d3eqfd4e)epeepfd5e)epeepfd6e)epeepfd7e)eepefe<fd#d&fdH„Z€em«d>fd+e)epefdIeepefdJeqd#e<fdK„Zem«d>fdLepdIeepefdJeqfdM„Zd#e!epfdN„Zƒd(epd#e$epfdO„Z„ dÓdPepdQepdReqd#e$eqfdS„Z… dÓdQepdReqd#e$eqfdT„Z†dUepd#e'epepffdV„Z‡dPepd#epfdW„ZˆdQepd#e$epfdX„Z‰dPepd#eqfdY„ZŠdPepd#epfdZ„ZdPepd#e$epfd[„ZŒd\epd]epd#eqfd^„Zd+e)epefd#eepeffd_„ZŽd+e)epefd#eepeffd`„Zd(epd#eqfda„Zd(epd#efdb„Zdce>dde>dee>d#dfdf„Zdgepd#e!epfdh„Z“dd>diœdge)epe!epfdje$edkeqd#e
j(fdl„Z•ed+e)epefd#e efdm„«Zed#eeddffdn„«Z—d#eqfdo„Z˜d#eqfdp„Z™dqed#epfdr„Zšdsedted#eqfdu„Z dÔdveqdweqd#e$eKfdx„Zœdy„Zd+e)epefd#e%fdz„Zžd{ee)epe%fd#e%fd|„ZŸd{ee)epe%fd#e%fd}„Z d{ee)epe%fd#e%fd~„Z¡deepgefd#eepgeffd€„Z¢deepgefdeepefdepd#efdƒ„Z£d„eepe!elfd#eepe!elffd…„Z¤d†eepe!elfd‡epdˆepd#eepe!elffd‰„dÕdŠe¦de¦dŒe¦de$e¦d#e'e¦e¦ff
dŽ„Z§dedd#e!dfd‘„Z¨dedd#e!dfd’„Z©ekjUd“«d”„«Z«d•eepege¬ffd6eepd#e¬fd–„Z­d—e¬d˜eepee¬geffd6eepd#dfd™„Z®d•eepegeffd6eepd#eepeffdš„Z¯deepefd˜eepeegeffd6eepd#eepeffdœ„Z°d+e)epefdeepeegdffd6eepd#efdž„Z±d+e)epefdŸeepeegdffd6eepd#efd „Z²d(epd¡e)epefd#efd¢„Z³d£epd#epfd¤„Z´dLepd#epfd¥„Zµd¦eepdLepd#e'e!epe!eqffd§„Z¶d7e)eepefe<fd#e<fd¨„Z·d©eepefd#eepelffdª„Z¸d>d«œdqeepelfd¬eqd#eepeffd­„Z¹d7e<d®epfd¯„Zºd7e<d®epd°ed#dfd±„Z»gfd>d«œd²eepefd³e!epd¬eqd#e e'e!epeffd´„Z¼dµed#e!epfd¶„em«fd·e!eepe$e¾fdIeepe$e¾fd#eepe$e¾ffd¸„Z¿Gd¹„dº«ZÀd#e@fd»„ZÁd¼„ZÂdµed#eqfd½„ZÃd¾epd#eqfd¿„ZÄdÀedÂdÃd(epdÄeepdÃe!dÁeÅge#fdÅe"epefd#e dÁf dÆ„ZÆdÇ„ZÇdÈ„ZÈdÉ„ZÉdÊ„ZÊd#e¦fdË„ZËd#eepe!epffdÌ„ZÌdÍ„ZÍdÖdÎe¦dÏepd#eqfdЄZÎd×de¦dÏepdÑeqd#e¦fdÒ„ZÏy#eD$rdZCYŒywxYw)ØéN)Ú defaultdict)Úcontextmanager)ÚPath)Ú
ModuleType)Ú
TYPE_CHECKINGÚAnyÚCallableÚDictÚ GeneratorÚIterableÚIteratorÚListÚMappingÚNoReturnÚOptionalÚPatternÚSetÚTupleÚTypeÚUnionÚcast)ÚRegistryÚ
RegistryError)Ú Requirement)ÚInvalidSpecifierÚ SpecifierSet)ÚInvalidVersionÚVersion)ÚAdamÚConfigÚConfigValidationErrorÚModelÚNumpyOpsÚ OptimizerÚget_current_ops)Ú compoundingÚdecayingÚfix_random_seedé)Úabout)Ú
CudaStreamÚcupyÚimportlib_metadataÚ
is_windows)ÚOLD_MODEL_SHORTCUTSÚErrorsÚWarnings©ÚORTH)ÚLanguageÚ PipeCallable)ÚDocÚSpan)ÚVocabiìÿÿÿ)ÚcsÚdaÚdeÚelÚenÚgrcÚidÚlbÚmkÚptÚruÚsrÚtaÚth) ÚpathsÚ variablesÚsystemÚnlpÚ
componentsÚcorporaÚtrainingÚ pretrainingÚ
initializeÚspacyz)[%(asctime)s] [%(levelname)s] %(message)scóeZdZdZy)ÚENV_VARSÚSPACY_CONFIG_OVERRIDESN)Ú__name__Ú
__module__Ú __qualname__ÚCONFIG_OVERRIDES©óúKC:\Users\garci\AppData\Roaming\Python\Python312\site-packages\spacy/util.pyrRrRfsØrYrRc óŒeZdZejddd¬«Zejddd¬«Zejddd¬«Zejddd¬«Zejddd¬«Z ejdd d¬«Z
ejdd
d¬«Z ejdd d¬«Z ejdd d¬«Z
ejdd
d¬«Zejddd¬«Zejddd¬«Zejddd¬«Zejddd¬«Zejddd¬«Zejdd«Zejddd¬«Zejddd¬«Zedeefd«Zedededefd«Zedededeeeeee fffd«Z!ededede"fd«Z#y)ÚregistryrPÚ languagesT)Ú entry_pointsÚ
architecturesÚ
tokenizersÚ lemmatizersÚlookupsÚdisplacy_colorsÚmiscÚ callbacksÚbatchersÚreadersÚ
augmentersÚloggersÚscorersÚvectorsÚ factoriesÚinternal_factoriesÚmodelsÚcliÚreturncó¾g}tj|«D]9\}}|jd«rŒt|t«sŒ)|j |«Œ;t
|«S)zList all available registries.Ú_)ÚinspectÚ
getmembersÚ
startswithÚ
isinstancerÚappendÚsorted)ÚclsÚnamesÚnameÚvalues rZÚget_registry_nameszregistry.get_registry_names‡sOðˆÜ"×-¨cÖ2‰KˆD—?? '¬J°u¼hÕ,GØ ˜erYÚ
registry_nameÚ func_namecózt||«sMdj|j««xsd}ttj
j
||¬««t||«} |j|«}|S#t$|jd«r?|jdd«} |j|«cYS#tj$rYnwxYwdjt|j«j«««xsd}ttjj
|||¬««dwxYw)z,Get a registered function from the registry.ú, Únone©r{Ú availableúspacy.ú
spacy-legacy.©r{Úreg_namer„N)ÚhasattrÚjoinr}rr0ÚE892ÚformatÚgetattrÚgetruÚreplaceÚ cataloguerxÚget_allÚkeysÚE893)ryr~rrzÚregÚfuncÚ legacy_namer„s rZz registry.gets$ô
s˜MÔ—I‘I˜c×A¸6ˆEܤ§ ¡ × 2Ñ 2¸
ÐQVÐ 2Ó WÓ c˜=Óð Ø—7‘7˜9Ó%ˆDðˆ øôò Ø×# '׸J ðØŸ7™7 ;Ó/øÜ ×ÙðúàŸ ™ ¤&¨¯©«×);Ñ);Ó)=Ó">ÓIÀ6ˆÜ ×"¨]Àiðóðð 
ð úó1Á'A:Á:,D:Â'B:Â7D:Â:CÃ
D:ÃCÃA*D:cózt||«sMdj|j««xsd}ttj
j
||¬««t||«} |j|«}|S#t$|jd«r?|jdd«} |j|«cYS#tj$rYnwxYwdjt|j«j«««xsd}ttjj
|||¬««dwxYw)auFind information about a registered function, including the
module and path to the file it's defined in, the line number and the
docstring, if available.
registry_name (str): Name of the catalogue registry.
func_name (str): Name of the registered function.
RETURNS (Dict[str, Optional[Union[str, int]]]): The function info.
rrr…r†r‡N)r‰r}rr0rrÚfindrurrrxrrr“)ryr~rrzr”Ú func_inforr„s rZr™z
registry.findªs%ôs˜—I‘I˜c×A¸6ˆEܤ§ ¡ × 2Ñ 2¸
ÐQVÐ 2Ó WÓ c˜=Óð ØŸ Ó+ˆIðÐøôò Ø×# '׸J ðØŸ8™8 KÓ0øÜ ×ÙðúàŸ ™ ¤&¨¯©«×);Ñ);Ó)=Ó">ÓIÀ6ˆÜ ×"¨]Àiðóðð 
ð úr—có–t||«syt||«}|jd«r|jdd«}||vxs||vS||vS)z4Check whether a function is available in a registry.Fr…r†)r‰rrur)ryr~rr”rs rZÚhasz registry.hasÍs]ôs˜MÔÜc˜=ÓØ × Ñ  Ô +¨H°oÓFˆ Ð9 {°cÐ'9Ð ˜ÐrYN)$rTrUrVrÚcreater]r_r`rarbrcrdrerfrgrhrirjrkÚ_entry_point_factoriesrlrnroÚ classmethodrÚstrr}r r
rrÚintr™ÚboolrœrXrYrZr\r\jsHØ × Ñ  ¨+ÀDÔI€IØ$I×$ W¨oÈDÔQ€MØ!×! '¨<ÀdÔK€JØ")×" 7¨MÈÔM€K؈i×јw¨ ÀÔE€GØ&i×& wÐ0AÐPTÔU€OØ ˆ9× Ñ ˜G V¸$Ô ?€Dà × Ñ  ¨+ÀDÔI€I؈y×Ñ ¨À$ÔG€H؈i×јw¨ ÀÔE€GØ!×! '¨<ÀdÔK€J؈i×јw¨ ÀÔE€G؈i×јw¨ ÀÔE€G؈i×јw¨ ÀÔE€Gð .˜Y×-¨g°{ÐQUÔØ × Ñ  Ð*>Ó?€IðˆY×
Ñ
˜g x¸
C€FØ
ˆ)×
Ñ
˜7 E¸Ô
=€Càð 4¨¡9òóððð ð°ð¸òóðð2ð Øð Ø,/ð à
ˆc8˜E # s (™OÑ,Ñ -ò óð ðDð  ð °ð ¸ò óñ rYr\cóZeZdZdZej
dœdeddfˆfdZdZd
dZ d „Z
ˆxZ S) ÚSimpleFrozenDictzâSimplified implementation of a frozen dict, mainly used as default
function or method argument (for arguments that should default to empty
dictionary). Will raise an error if user or spaCy attempts to add to dict.
©Úerrorr¦rpNcó2t||i|¤Ž||_y)z£Initialize the frozen dict. Can be initialized with pre-defined
values.
error (str): The error message when user tries to assign to dict.
N)ÚsuperÚ__init__r¦)Úselfr¦ÚargsÚkwargsÚ __class__s €rZzSimpleFrozenDict.__init__ßsø€ô ј$Ð) &Òˆ
rYcó,t|j«©ÚNotImplementedErrorr¦)Úkeyr|s rZÚ __setitem__zSimpleFrozenDict.__setitem__èóÜ! $§*¡*Ó-rYcó,t|j«)Údefaults rZÚpopzSimpleFrozenDict.popër´rYcó,t|j«)Úothers rZÚupdatezSimpleFrozenDict.updateîr´rY) rTrUrVÚ__doc__r0ÚE095r Ú
__classcell__©r­s@rZÙs3ø„ñð
,2¯;©;ò SðÀTõò.rYcóveZdZdZej
dœdeddfˆfdZdZdZ d „Z
d
Z d Z d Z
d
ZdZˆxZS)ÚSimpleFrozenListaWrapper class around a list that lets us raise custom errors if certain
attributes/methods are accessed. Mostly used for properties like
Language.pipeline that return an immutable list (and that we don't want to
convert to a tuple to not break too much backwards compatibility). If a user
accidentally calls nlp.pipeline.append(), we can raise a more helpful error.
rpNcó,||_t||Žy)zpInitialize the frozen list.
error (str): The error message when user tries to mutate the list.
N))r­s €rZzSimpleFrozenList.__init__úsø€ð
ˆŒ
Ü
ј$ÒrYcó,t|j«©s rZrwzSimpleFrozenList.appendr´rYcó,t|j«s rZÚclearzSimpleFrozenList.clearr´rYcó,t|j«s rZÚextendzSimpleFrozenList.extendr´rYcó,t|j«s rZÚinsertzSimpleFrozenList.insert r´rYcó,t|j«s rZzSimpleFrozenList.popr´rYcó,t|j«s rZÚremovezSimpleFrozenList.remover´rYcó,t|j«s rZÚreversezSimpleFrozenList.reverser´rYcó,t|j«s rZÚsortzSimpleFrozenList.sortr´rY)rTrUrVr0ÚE927r rws@rZòsLø„ñð,2¯;©;ò  Sð ¸4õ ò.rYÚlangrpcó&|tjvS)aCheck whether a Language class is already loaded. Language classes are
loaded lazily, to avoid expensive setup code associated with the language
data.
lang (str): Two-letter language code, e.g. 'en'.
RETURNS (bool): Whether a Language class has been loaded.
)r\r])s rZÚlang_class_is_loadedrÔsð ”8× %rYcóJddl}|dk(ryg}tj|jj«D]L}|j
}|dk(r|j
d«Œ&tj|«sŒ<|j
|«ŒNtj||d¬«}|dk(ry|S)a
Given an IETF language code, find a supported spaCy language that is a
close match for it (according to Unicode CLDR language-matching rules).
This allows for language aliases, ISO 639-2 codes, more detailed language
tags, and close matches.
Returns the language code if a matching language is available, or None
if there is no matching language.
>>> find_matching_language('en')
'en'
>>> find_matching_language('pt-BR') # Brazilian Portuguese
'pt'
>>> find_matching_language('fra') # an ISO 639-2 code for French
'fr'
>>> find_matching_language('iw') # obsolete alias for Hebrew
'he'
>>> find_matching_language('no') # Norwegian
'nb'
>>> find_matching_language('mo') # old code for ro-MD
'ro'
>>> find_matching_language('zh-Hans') # Simplified Chinese
'zh'
>>> find_matching_language('zxx')
None
rxxÚmulé )Ú max_distance)
Ú
spacy.langÚpkgutilÚ iter_modulesrÒÚ__path__r{rwÚ langcodesÚ tag_is_validÚclosest_supported_match)rPÚpossible_languagesÚmodinfoÚcodeÚmatchs rZÚfind_matching_languagerå&s™ó6à ˆt‚|ØðÐÜׯ
©
×(;Ñ(;Ö<ˆØ|‰|ˆØ 4Š<à × % 
×
# 
× % 

× -¨dÐ4FÐUVÔ W€EØ àˆ rYr4có>|tjvrtjj|«S tjd|d«}t|t||jd««tjj|«S#t
$r…} t
|«}n%#tjj$rd}YnwxYw|r|}tjd|d«}n+t tjj||¬««|Yd}~ŒËd}~wwxYw)zImport and load a Language class.
lang (str): IETF language code, such as 'en'.
RETURNS (Language): Language class.
z.lang.rPN)Úerrr)r\r]Ú importlibÚ
import_moduleÚ ImportErrorråÚ
tag_parserÚLanguageTagErrorr0ÚE048rŒÚset_lang_classrÚ__all__)Úmodulerçs rZÚget_lang_classrñ]sð Œx××!×%  SÜ×,¨v°d°V¨_¸Fˆ tœW V¨V¯^©^¸AÑ->Ó × Ñ × !  'øôò
Sð
Ü.¨tÓ4øÜ×'×
àð
úñØÜ"×0°6¸$¸°À'ÓJä!¤&§+¡+×"4Ñ"4¸$ÀCÐ"4Ó"HÓIÈsÐûð
Sús6³B D B$Â#DÂ$CÃDÃCÃA DÄDr{rycóFtjj||¬«y)z™Set a custom Language class name that can be loaded via get_lang_class.
name (str): Name of Language class.
cls (Language): Language class.
)r•N)r\r]Úregister)r{rys rZ|sô 
×Ñ×Ñ ¨3ÐÕ/rYÚpathcó<t|t«r t|«S|S)zEnsure string is converted to a Path.
path (Any): Anything. If string, it's converted to Path.
RETURNS: Path or original argument.
)rvr r©s rZÚ ensure_pathr÷sô ÔÜD‹zÐàˆ rYcó:t|«}|j«rtj|«S|j |j
dz«}|j«rtj |«Sttjj|¬««)zåLoad JSON language data using the given path as a base. If the provided
path isn't present, will attempt to load a gzipped version before giving up.
path (str / Path): The data to load.
RETURNS: The loaded data.
z.gzrö) ÚexistsÚsrslyÚ read_jsonÚ with_suffixÚsuffixÚread_gzip_jsonÚ
ValueErrorr0ÚE160rŒs rZÚload_language_datarsvô  €DØ ‡{{„}ܘtÓ × Ñ ˜DŸK™K¨%Ñ 0€DØ ‡{{„}Ü×# DÓ
”V—[[×'¨TÐ
3rYcó&t|d«s2ttjj t |«¬««t
ttjtj|jj««}|jS)zpGet the path of a Python module.
module (ModuleType): The Python module.
RETURNS (Path): The path.
rU))r‰rÿr0ÚE169rŒÚreprrrÚosÚPathLikeÚsysÚmodulesrUÚ__file__Úparent)Ú file_paths rZÚget_module_pathr ¡seô 6˜<Ô œŸ×+´4¸³<ДTœ"Ÿ+™+¤s§{¡{°6×3DÑ3DÑ'E×'NÑ'NÓP€IØ × Ñ ÐrYÚvocabÚdisableÚenableÚexcludeÚconfigrr8rrrrcó|||||dœ}t|t«rv|jd«r t|j dd«««St |«r t
|fi|¤ŽSt|«j«r-tt|«fi|¤ŽSt|d«r t|fi|¤ŽS|tvr1ttjj|t|¬««ttj j|¬««)Load a model from a package or data path.
name (str): Package name or model path.
vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
a new Vocab object will be created.
disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable.
enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All others will be disabled.
exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude.
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
keyed by section values in dot notation.
RETURNS (Language): The loaded nlp object.
r
zblank:Ú)r{Úfull©r{)rvr rurÚ
is_packageÚload_model_from_packagerÚload_model_from_pathr‰r/ÚIOErrorr0ÚE941rŒÚE050)r{rrrrrs rZÚ
load_modelr±ð,ØØØØñ €FôÔØ ?‰?˜ =”> $§,¡,¨x¸Ó"<Ó  Ü*¨4Ñ:°6Ñ ‹:× Ñ Ô Ü¨T«
Ñ=°fÑ xÔ Ü# 3¨FÑ Ô”f—kk×(¨dÔ9LÈTÑ9RÐ
”&—++×$¨$Ð
0rYcóXtj|«}|j|||||¬«S)aLoad a model from an installed package.
name (str): The package name.
vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
a new Vocab object will be created.
disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable. Disabled
pipes will be loaded but they won't be run unless you explicitly
enable them by calling nlp.enable_pipe.
enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All other
pipes will be disabled (and can be enabled using `nlp.enable_pipe`).
exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude. Excluded
components won't be loaded.
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
keyed by section values in dot notation.
RETURNS (Language): The loaded nlp object.
r
)Úload)r{rrrrrrys rZrrÛs.ô2 ×
! 
'€CØ 8‰8˜¸ÈÐY_ˆ `rY)ÚmetarrrrrÚ
model_pathr có|j«s)ttjj |¬««|s t |«}|dz }t
|d¬«}t||¬«}t||||||¬«} | j|||¬«S)aLoad a model from a data directory path. Creates Language class with
pipeline from config.cfg and then calls from_disk() with path.
model_path (Path): Model path.
meta (Dict[str, Any]): Optional model meta.
vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
a new Vocab object will be created.
disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable. Disabled
pipes will be loaded but they won't be run unless you explicitly
enable them by calling nlp.enable_pipe.
enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All other
pipes will be disabled (and can be enabled using `nlp.enable_pipe`).
exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude. Excluded
components won't be loaded.
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
keyed by section values in dot notation.
RETURNS (Language): The loaded nlp object.
z
config.cfgT©Ú
for_overrides)Ú overrides)rrrrr )rr%)
rr0ÚE052rŒÚget_model_metaÚ dict_to_dotÚ load_configÚload_model_from_configÚ from_disk)
r!r rrrrrÚ config_pathr%rJs
rZrrøsð8 × Ñ Ô Ü”f—k‘k×(¨jРܘjÓØ˜|Ñ+€KܘF°$Ô7€IÜ
˜° Ô
:€FÜ
ØØØØØØ
ô
 €Cð =‰=˜¨WÀ ˆ JrYF)r rrrrÚ auto_fillÚvalidater-r.c
ód|vr)ttjj|¬««|d}d|vs|d€)ttjj|¬««t |d«} | j
||||||||¬«}
|
S)a%Create an nlp object from a config. Expects the full config file including
a section "nlp" containing the settings for the nlp object.
name (str): Package name or model path.
meta (Dict[str, Any]): Optional model meta.
vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
a new Vocab object will be created.
disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable. Disabled
pipes will be loaded but they won't be run unless you explicitly
enable them by calling nlp.enable_pipe.
enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All other
pipes will be disabled (and can be enabled using `nlp.enable_pipe`).
exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude. Excluded
components won't be loaded.
auto_fill (bool): Whether to auto-fill config with missing defaults.
validate (bool): Whether to show config validation errors.
RETURNS (Language): The loaded nlp object.
rJ©r)rrrrr-r.r )rÿr0ÚE985rŒÚE993rñÚ from_config) rr rrrrr-r.Ú
nlp_configÚlang_clsrJs rZr*r*&ð:
ÜœŸ×+°6И€JØ
 :¨fÑ#5Ð#=ÜœŸ×+°:ИÑ1€HØ
×
Ñ
ØØØØØØØØ
ð ó €Cð €JrYcó†|jdi«j«Dcic]\}}d|vrd|vr||Œc}}Scc}}w)zÇRETURNS (List[str]): All sourced components in the original config,
e.g. {"source": "en_core_web_sm"}. If the config contains a key
"factory", we assume it refers to a component factory.
rKÚfactoryÚsource)Úitems)rr{Úcfgs rZÚget_sourced_componentsr;XsSð Ÿ L°"Ó á=‰IˆD ˜CÑ  H°¡Oð
ˆc‰ Ø ðùó s¤=Ú dot_names.cóôi}g}g}|D]}||j|«Œ|jd«d}||vrStj||«rtj d||i«d}ntj ||«}|||< |jt ||««ŒŸ|r
t||¬«t|«S#t $r+d|}|j|jd«|dœ«YŒíwxYw)a:Resolve one or more "dot notation" names, e.g. corpora.train.
The paths could point anywhere into the config, so we don't know which
top-level section we'll be looking within.
We resolve the whole top-level section, although we could resolve less --
we could find the lowest part of the tree.
ú.rrznot a valid section reference: )ÚlocÚmsg)rÚerrors) rwÚsplitr\Ú
is_promiseÚresolveÚ
dot_to_objectÚKeyErrorr!Útuple) rr<ÚresolvedÚoutputrAr{ÚsectionÚresultr@s rZÚresolve_dot_namesrLfsð€HØ€FØ
€FÛˆØ ˆ<Ø M‰M˜$Õ à—j‘j “o (ˆ˜×& v¨g¡Ô%×-¨x¸À¹Ð.IÓJÈ8ÑT‘Fä%×-¨f°W©oÓ>FØ$*˜Ñ
DØ
œm¨H°dÓñ$Ü#¨6¸ øô ò
DØ7¸°vÐ>Ø
 d§j¡j°£o¸cÑ
Dús CÃ1C7Ã6C7Ú init_filec ót|«j}t|«}|dd|dd|d}||z } |j«s)t t
j j| ¬««t| ||||||¬«S)a‰Helper function to use in the `load()` method of a model package's
__init__.py.
vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
a new Vocab object will be created.
disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable. Disabled
pipes will be loaded but they won't be run unless you explicitly
enable them by calling nlp.enable_pipe.
enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All other
pipes will be disabled (and can be enabled using `nlp.enable_pipe`).
exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude. Excluded
components won't be loaded.
config (Dict[str, Any] / Config): Config overrides as nested dict or dict
keyed by section values in dot notation.
RETURNS (Language): The loaded nlp object.
rrr{úversionrö)rr rrrr) rr
r'rr0r&r)
rMrrrrrr!r Údata_dirÚ data_paths
rZÚload_model_from_init_pyrSs•ô2i“×'€JÜ ˜ %€DØv,˜q  f¡ ¨a°°Y±Ð/@ÐA€HؘXÑ%€IØ × Ñ Ô Ü”f—k‘k×(¨iÐ ØØØ
ØØØØô ðrYr%Ú interpolatecóTt|«}tt¬«}t|«dk(r0|j t
j j«||¬«S|r|j«s*ttjj|d¬««|j|||¬«S)Load a config file. Takes care of path validation and section order.
path (Union[str, Path]): Path to the config file or "-" to read from stdin.
overrides: (Dict[str, Any]): Config overrides as nested dict or
dict keyed by section values in dot notation.
interpolate (bool): Whether to interpolate and resolve variables.
RETURNS (Config): The loaded config.
©Ú
section_orderrO©r%rTz config file©r{)r ÚCONFIG_SECTION_ORDERr Úfrom_strrÚstdinÚreadÚis_filerr0ÚE053rŒr+)r%rTr,rs rZr)r)µô˜dÓ#€KÜ
Ô"6Ô
7€FÜ
ˆ;Ó˜3ÒØÜ I‰IN‰NÓ ¨ ¸{ðó
ð
ñ +×"5Ñ"5Ô"7Üœ&Ÿ+™+×,°+ÀMÐ ×ÑØ  9¸+ð ó
ð
rYÚtextcóFtt¬«j|||¬«S)zéLoad a full config from a string. Wrapper around Thinc's Config.from_str.
text (str): The string config to load.
interpolate (bool): Whether to interpolate and resolve variables.
RETURNS (Config): The loaded config.
rVrX)r rZr[)r`r%rTs rZÚload_config_from_strrbÐs)ô Ô 4Ô 5×  ˜ ¨{ð  ðrYcólttjj«j ««S)z~List all model packages currently installed in the environment.
RETURNS (List[str]): The string names of the models.
)Úlistr\rnrrrXrYrZÚget_installed_modelsreÞs%ô
×)× 1rYcó` tj|«S#tj$rYywxYw)zâGet the version of an installed package. Typically used to get model
package versions.
name (str): The name of the installed Python package.
RETURNS (str / None): The version or None if package not installed.
N)r-rPÚPackageNotFoundErrorrs rZÚget_package_versionrhæs0ðÜ)¨$Ó/øÜ × Ùðús -¬-rPÚ
constraintÚ prereleasescó |dj«rd|} t|«}t|«}||_||vS#ttf$rYywxYw)a
Check if a version (e.g. "2.0.0") is compatible given a version
constraint (e.g. ">=1.9.0,<2.2.1"). If the constraint is a specific version,
it's interpreted as =={version}.
version (str): The version to check.
constraint (str): The constraint string.
prereleases (bool): Whether to allow prereleases. If set to False,
prerelease versions will be considered incompatible.
RETURNS (bool / None): Whether the version is compatible, or None if the
version or constraint are invalid.
rú==N)Úisdigitrrrrrj)rPrirjÚspecs rZÚis_compatible_versionroósfð!ÑÔØ˜*˜Ð
ðܘ'ˆÜ˜"ˆð#€DÔØ dˆ?Ðøô
œnÐ Ùðúsš;»A
Á A
cóP|dj«ry t|«}||_|Dcgc]}|Œ}}t |«dk(r|dj
dvryt
d|D««ryt
d|D««}t
d|D««}|r|ryy#t$rYywxYwcc}w) NrFr)©ú>=Tc3ó8K|]}|jdvŒy­w)rlÚoperator©Ú.0Úsps rZú <genexpr>z+is_unconstrained_version.<locals>.<genexpr>sèø€Ð
1©5 Rˆ2;‰;˜4Ô ©5ùóc3ó8K|]}|jdvŒy­w))ú<z<=Nrurws rZrzz+is_unconstrained_version.<locals>.<genexpr>óèø€Ð°2B—K‘K ;Ôùr{c3ó8K|]}|jdvŒy­w)rqNrurws rZrzz+is_unconstrained_version.<locals>.<genexpr> r~r{)rmrrrjÚlenrvÚany)rirjrnryÚspecsÚ has_upperÚ has_lowers rZÚis_unconstrained_versionr…
ð!ÑÔØðܘJÓð#€DÔÙÓ ™$BŠR˜$€EÐ ä
ˆ5ƒzQ‚˜5 ™8× Ñä
Ñ
1©5Ó
ÜÑÓ?€IÜÑÓ?€Iá‘YØà øô! òÙðüò
s B¬ B#Â B ÂB Ú requirementcóZt|«}|jt|j«fS)z@Split a requirement like spacy>=1.2.3 into ("spacy", ">=1.2.3").)rr{r Ú specifier)r†Úreqs rZÚsplit_requirementrŠ(s$ä

"€CØ H‰H”c˜#Ÿ-™-Ó )rYcóVt|«j}d|d|dd|ddzdS)z_Generate a version range like >=1.2.3,<1.3.0 based on a given version
(e.g. of spaCy).
rsz,<rr>r)z.0)rÚrelease)rPs rZÚget_minor_version_ranger.s<ô×&€GØ
ˆy˜˜7 1™:˜, a¨°©
°Q©Ð'7°rÐ :rYcó‚ t|«}|D]}|jdvsŒ|jcS y#t$rYywxYw)z>From a version range like >=1.2.3,<1.3.0 return the lower pin.)rsrlz~=N)rrvrPÚ Exception)riÚspecsetrns rZÚget_model_lower_versionr6sNð
ܘzÓÛˆDØ}‰}Ð 2Ò—|‘|Òð
øô ò
Ø Ø ð
ús2¡
2¯2² >½>có,t|«jS)zªCheck whether a version is a prerelease version.
version (str): The version, e.g. "3.0.0.dev1".
RETURNS (bool): Whether the version is a prerelease version.
)rÚ
is_prerelease©rPs rZÚis_prerelease_versionr•Bsô  × )rYcó,t|«jS)z©Generate the base version without any prerelease identifiers.
version (str): The version, e.g. "3.0.0.dev1".
RETURNS (str): The base version, e.g. "3.0.0".
)rÚ base_versionr”s rZÚget_base_versionr˜Ksô  × (rYcóz t|«}|jd|jS#ttf$rYywxYw)zºGet the major + minor version (without patch or prerelease identifiers).
version (str): The version.
RETURNS (str): The major + minor version or None if version is invalid.
Nr>)rÚ TypeErrorrÚmajorÚminor)rPÚvs rZÚget_minor_versionržTsEð Ü  ˆðg‰gˆYŸ !øô
”~Ð Ùðús (¨:¹:Ú version_aÚ version_bcóPt|«}t|«}|duxr |duxr||k(S)aNCompare two versions and check if they match in major and minor, without
patch or prerelease identifiers. Used internally for compatibility checks
that should be insensitive to patch releases.
version_a (str): The first version
version_b (str): The second version.
RETURNS (bool): Whether the versions match.
N))r Úbs rZÚis_minor_version_matchr¤as4ô ˜)Ó$€Aܘ)Ó$€AØ Dˆ=Ò 5˜Q d˜ 5¨q°A©vÐ5rYcó"t|«}|jj«s3ttj
j
|j¬««|j«r|j«s4ttjj
|jd¬««tj|«}dD]4}||vs||rŒ
ttjj
|¬««d|vrttj|d«s„t!|d«}t#|«}|d|z}nd|vr d |dz}nd
}t$j&j
|d d |d
|d|tj¬«}t)j*|«t-|d«r`t$j.j
|d d |d
|d|dt1tj«¬«}t)j*|«|S)z¢Load a model meta.json from a path and validate its contents.
path (Union[str, Path]): Path to meta.json.
RETURNS (Dict[str, Any]): The loaded meta.
ú meta.jsonrY)r{rP)ÚsettingÚ
spacy_versionrÚspacy_git_versionz git commit zversion unknownrÒrrr{rP)ÚmodelÚ
model_versionrPÚcurrent)rPÚexample)r
rr0r&r^r_rÿÚE054ror*Ú __version__rr˜r1ÚW095ÚwarningsÚwarnr…ÚW094r)r Ú
lower_versionÚwarn_msgs rZÚ load_metar¶os×ô  €DØ ;‰;× Ñ Ô Ü”f—k‘k×(¨d¯k©kÐ ;‰;Œ= § ¡ ¤Ü”f—k‘k×(¨d¯k©kÀ Ð ?‰?˜4Ó €DÛØ ˜$Ñ  d¨7£mÜœVŸ[™[×Ð ˜$ÒÜ$¤U×%6Ñ%6¸¸_Ñ8MÔ3°D¸Ñ4IÓJˆMÜ,¨]Ó;ˆMØÐ(Ø # mÑ 3
ØÑ,Ø -°Ð5HÑ0IÑ I
à 1
Ü—}‘}טf™˜ ¨V©  " 9™oØ×ˆHô
M‰M˜ # Ñ$9Ô —}‘}טf™˜ ¨V©  ~Ð" 9™oؘ/´×0AÑ0AÓˆ
M‰M˜ €KrYcó4t|«}t|dz «S)z¯Get model meta.json from a directory path and validate its contents.
path (str / Path): Path to model directory.
RETURNS (Dict[str, Any]): The model's meta data.
))r!s rZr'r'šsô ˜"€JÜ Z  .rYcó> tj|«y#YyxYw)z˜Check if string maps to a package installed via pip.
name (str): Name of package.
RETURNS (bool): True if installed package, False if not.
TF)r-Ú distributionrs rZrr¤s"ð Ü×ÔøðÙús˜có´tj|«}tttt
t jf|j««jS)zxGet the path to an installed package.
name (str): Package name.
RETURNS (Path): Path to installed package.
)
rrrr rrr r
)r{Úpkgs rZÚget_package_pathr¼±s>ô ×
! 
'€CÜ ”Uœ3¤§ ¡ Ð,¨c¯l©lÓ <× CrYÚtargetÚ replacementcó4|j«D]9}||jvsŒ||j|jj|«<Œ;|j«D]9}|jD](}|j |«|usŒ|j ||«Œ*Œ;y)zÊReplace a node within a model with a new one, updating refs.
model (Model): The parent model.
target (Model): The target node.
replacement (Model): The node to replace the target with.
N)ÚwalkÚlayersÚindexÚ ref_namesÚ
maybe_get_refÚset_ref)ÚnodeÚref_names rZÚreplace_model_noderȽs~ð
ˆØ T—[‘[Ò Ø5@ˆDK‰K˜Ÿ ×)¨&Ó ð
ˆØŸœˆHØ×! (Ó+¨vÒ ˜X rYÚcommandcó:tj|t ¬«S)zSplit a string command using shlex. Handles platform compatibility.
command (str) : The command to split
RETURNS (List[str]): The split command.
)Úposix)ÚshlexrBr.)s rZÚ
split_commandrÍÏsô
;‰;w¬*  5rY)r\Úcapturer\c óÐt|t«rt|«}|}n|}dj|«} t j
|t jj«|dd|rtjnd|rtjnd¬«}|jdk7r_|r]d|d }|d
|jz
}|j |d z
}||j z
}t j"|«}||_||_||jdk7rt)j*|j«|S#t$r/ttjj||d¬««dwxYw) Run a command on the command line as a subprocess. If the subprocess
returns a non-zero exit code, a system exit is performed.
command (str / List[str]): The command. If provided as a string, the
string will be split using shlex.split.
stdin (Optional[Any]): stdin to read from or None.
capture (bool): Whether to capture the output and errors. If False,
the stdout and stderr will not be redirected, and if there's an error,
sys.exit will be called with the return code. You should use capture=False
when you want to turn over execution to the command, and capture=True
when you want to run the command more like a function.
RETURNS (Optional[CompletedProcess]): The process object.
ú Úutf8FN)ÚenvÚinputÚencodingÚcheckÚstdoutÚstderrr)Ú str_commandÚtoolzError running command:
z
zSubprocess exited with status z$
Process log (stdout and stderr):
)rvr Ú
subprocessÚrunrÚenvironÚcopyÚPIPEÚSTDOUTÚFileNotFoundErrorr0ÚE970rŒÚ
returncoderÖÚSubprocessErrorÚretrÉrÚexit)r\Úcmd_listÚcmd_strräÚmessager¦s rZÚ run_commandré×sOô$'œ3ÔÜ  ÓØàˆØ—(‘(˜7ÓðÜn‰nØ Ü
ÓØØÙ&-”:—??°4Ù(/”:×$°Tô
ˆð ‡~Ò™wØ.¨w¨i°tÐ<ˆØÐ3°C·N±NÐ3CÐØ :‰:Ð Ð BˆGØ s—z‘zÑ !ˆGÜ×*¨7ÓØˆŒ ؈Œ
؈ Ø ˜1Ò Ü Ô Ø €Jøô% òô Ü K‰K× Ñ ¨7¸À!¹Ð Ó 
àð ðús ³AD-Ä-8E%c#ó.Ktj«}t|«j«}tjt |«« |tjt |««y#tjt |««wxYw­w)aUChange current working directory and returns to previous on exit.
path (str / Path): The directory to navigate to.
YIELDS (Path): The absolute path to the current working directory. This
should be used if the block needs to perform actions within the working
directory, to prevent mismatches with relative paths.
N)rÚcwdrDrÚchdirr )Úprev_cwdr¬s rZÚ working_dirrîs_èø€ôx‰x‹z€HÜ4‹j× Ñ Ó"€G܇HHŒSð ØŠ
ä
X“ÕøŒX“Õüs‚A BÁA2ÁBÁ2 BÂBc#ó’Kttj««}|d} tjdk\r!t j t|«|¬«yt j t|«|¬«y#t$r>}tjtjj||¬««Yd}~yd}~wwxYw­w)z¬Execute a block in a temporary directory and remove the directory and
its contents at the end of the with block.
YIELDS (Path): The path of the temp directory.
có\tj|tj«||«y)rÚchmodÚstatÚS_IWRITE)ÚrmfuncrôÚexs rZÚ force_removez"make_tempdir.<locals>.force_remove*sÜ
”t—}‘}Ôˆt rY)éé )Úonexc)Úonerror)Údirr@N)rÚtempfileÚmkdtemprÚ version_infoÚshutilÚrmtreer ÚPermissionErrorr±r1ÚW091rŒ)ÚdröÚes rZÚ make_tempdirrèø€ô
ŒX×
Ñ
Ó
Ó €AØ
‚Gòð × Ñ ˜ M‰Mœ#˜a Ö M‰Mœ#˜a&¨,Ö 7øÜ ò
”h—m‘m×*¨q°aÐ9ûð:üs:%C¨3A=ÁCÁ A=Á<CÁ= CÂ4B?Â:CÂ?CÃCcóÖ t«jjdk(ryt«jjdk(ry ddl}y#t$rYŒwxYw#t $rYywxYw)zÎCheck if user is running spaCy from a Jupyter or Colab notebook by
detecting the IPython kernel. Mainly used for the displaCy visualizer.
RETURNS (bool): True if in Jupyter/Colab, False if not.
ÚZMQInteractiveShellTzgoogle.colab._shellrNF)Ú get_ipythonr­rTrUÚ NameErrorÚ google.colabrê)Úgoogles rZÚ
is_in_jupyterr 7svð
Ü =× "× +Ð/DÒ Ü =× "× -Ð1FÒ ð Gð

Ûàøô
ò
Ù ð
ûô ò
Ø Ø ð
ús(!A
¤!A
ÁAÁ
AÁAÁ A(Á'A(cóFttd«xsttd«S)z³Check if user is running spaCy from an interactive Python
shell. Will return True in Jupyter notebooks too.
RETURNS (bool): True if in interactive mode, False if not.
Úps1Úps2)r‰rrXrYrZÚis_in_interactiverOsô ”3˜Ó Ò 5¤'¬#¨uÓ"5Ð5rYÚobjcót|d«r|j |jSt|d«r |jSt|d«r,t|jd«r|jjSt |«S)z¹Get a human-readable name of a Python object, e.g. a pipeline component.
obj (Any): The Python object, typically a function or class.
RETURNS (str): A human-readable name.
r{rTr­)r‰r{rTr­r)rs rZÚget_object_namerXseô ˆs §¡Ð 4Øx‰xˆÜˆsØ|‰|Ð܈s ¤W¨S¯]©]¸JÔ%GØ}‰}× rYÚfunc1Úfunc2cóTt|«r t|«syt|d«r t|d«sy|j|jk(}tj|«tj|«k(}tj
|«tj
|«k(}|xr|xr|S)Approximately decide whether two functions are the same, even if their
identity is different (e.g. after they have been live reloaded). Mostly
used in the @Language.component and @Language.factory decorators to decide
whether to raise if a factory already exists. Allows decorator to run
multiple times with the same function.
func1 (Callable): The first function.
func2 (Callable): The second function.
RETURNS (bool): Whether it's the same function (most likely).
FrV)Úcallabler‰rVrsÚgetfileÚgetsourcelines)rrÚ same_nameÚ same_fileÚ same_codes rZÚ is_same_funcrgô EŒ?¤(¨5¤/ØÜ 5˜.Ô ¸ÀÔ1OØØ×" e×&8Ñ&8Ñ8€IÜ Ó&¬'¯/©/¸%Ó*@Ñ@€IÜ×& -´×1GÑ1GÈÓ1NÑN€IØ Ò 0˜Ò 0 0rYÚrequireÚ non_blockingcó^t«}tyt|t«ryt|¬«S)N)r)r%r+rvr#)rrÚopss rZÚget_cuda_streamr"|s-ô Ó
€CÜÐØÜ Ô "Øä 4rYcó”t|Stj|jd|j¬«}|j ||¬«|S)C)ÚorderÚdtype)Ústream)r,ÚndarrayÚshaper&Úset)r'Ú numpy_arrayÚarrays rZÚ get_asyncr-ˆs@Ü €|ØÐä ˜[×.°cÀ×ARÑARÔSˆØ
+ fˆ Ôˆ rYc ódt|«}|jd¬«5}|j«jd«}ddd«dj Dcgc]+}|j «sŒdt
j|«zŒ-c}«}t
j|«S#1swYŒdxYwcc}w)NrÑ)ú
ú^) r÷Úopenr]rBÚstripÚreÚescapeÚcompile)Úfile_ÚentriesÚpieceÚ
expressions rZÚ
read_regexr;Ü  €DØ
˜FˆÔ # uØ—*‘*“,×$ ÷
Ù-4ÓF©W E¸¿ ¹ ½
ˆŒry‰y˜ÓÓ ¨WÑ€Jô :‰: 
$Ð #üò Gsž B!ÁB-Á+B-Â!B*r8cóœdj|Dcgc]}|j«sŒd|zŒc}«}tj|«Scc}w)aCompile a sequence of prefix rules into a regex object.
entries (Iterable[Union[str, Pattern]]): The prefix rules, e.g.
spacy.lang.punctuation.TOKENIZER_PREFIXES.
RETURNS (Pattern): The regex object. to be used for Tokenizer.prefix_search.
r0r1©r3r4r6©r8r9r:s rZÚcompile_prefix_regexr?sBð±GÓM±G¨5¸u¿{¹{½}˜3 ;°GÑN€JÜ
:‰: !ùòNó
A ¦A cóœdj|Dcgc]}|j«sŒ|dzŒc}«}tj|«Scc}w)aCompile a sequence of suffix rules into a regex object.
entries (Iterable[Union[str, Pattern]]): The suffix rules, e.g.
spacy.lang.punctuation.TOKENIZER_SUFFIXES.
RETURNS (Pattern): The regex object. to be used for Tokenizer.suffix_search.
r0ú$r=r>s rZÚcompile_suffix_regexrC¦sBð±GÓM±G¨5¸u¿{¹{½}˜5 3;°GÑN€JÜ
:‰: !ùòNr@có–dj|Dcgc]}|j«sŒ|Œc}«}tj|«Scc}w)aCompile a sequence of infix rules into a regex object.
entries (Iterable[Union[str, Pattern]]): The infix rules, e.g.
spacy.lang.punctuation.TOKENIZER_INFIXES.
RETURNS (regex object): The regex object. to be used for Tokenizer.infix_finditer.
r0r=r>s rZÚcompile_infix_regexrE±s>ð©gÓG©g U¸¿¹½š5¨gÑH€JÜ
:‰: !ùòHs
A¦AÚ default_funccó8tjt||«S)aQExtend an attribute function with special cases. If a word is in the
lookups, the value is returned. Otherwise the previous function is used.
default_func (callable): The default function to execute.
*lookups (dict): Lookup dictionary mapping string to attribute value.
RETURNS (callable): Lexical attribute getter.
)Ú functoolsÚpartialÚ_get_attr_unless_lookup)rFrbs rZÚ add_lookupsrK¼sô × Ñ Ô4°lÀGÓ LrYrbÚstringcó6|D]
}||vsŒ||cS||«SrX)rFrbrLÚlookups rZrJrJÉs-óˆØ  ؘ& ñ ˜Ó ÐrYÚbase_exceptionscóœt|«}|D}|j«D]ˆ\}}td|D««s*ttj
j
||¬««djd|D««}||k7sŒ`ttjj
||¬««|j|«Œ°t|dd«}|S)a'Update and validate tokenizer exceptions. Will overwrite exceptions.
base_exceptions (Dict[str, List[dict]]): Base exceptions.
*addition_dicts (Dict[str, List[dict]]): Exceptions to add to the base dict, in order.
RETURNS (Dict[str, List[dict]]): Combined tokenizer exceptions.
c3óJK|]}t|tt«Œy­w)rvr3r ©rxÚattrs rZrzzupdate_exc.<locals>.<genexpr>Þsèø€ÐK¹{°t”z $¤t¡*¬c×2¹{ùs!#)Úorthsrc3ó.K|]
}|tŒy­wr2rRs rZrzzupdate_exc.<locals>.<genexpr>àsèø€Ð$H¹K°D T¬$¥Z¹Kùóú'u’) Údictr9Úallrÿr0ÚE055rŒÚE056rºÚ
expand_exc)rOÚaddition_dictsÚexcÚ additionsÚorthÚ token_attrsÚdescribed_orths rZÚ
update_excrcÒô ˆÓ
€CÛ#ˆ Ø!*§¡Ö!2Ñ ˆDÑK¹{Ó ¤§¡×!3Ñ!3¸ÀKÐ!3Ó!PÓŸW™WÑ$H¹KÓ$HÓHˆNØ ¤§¡×!3Ñ!3¸ÀNÐ!3Ó!SÓ "3ð
ð S˜#˜uÓ
%€CØ €JrYÚexcsÚsearchrc óÂd}t|«}|j«D]8\}}||vsŒ |j||«}|Dcgc] }||||«Œ} }| ||<Œ:|Scc}w)abFind string in tokenizer exceptions, duplicate entry and replace string.
For example, to add additional versions with typographic apostrophes.
excs (Dict[str, List[dict]]): Tokenizer exceptions.
search (str): String to find and replace.
replace (str): Replacement.
RETURNS (Dict[str, List[dict]]): Combined tokenizer exceptions.
có\t|«}|tj||«|t<|S)rXr3r)ÚtokenrerÚfixeds rZÚ
_fix_tokenzexpand_exc.<locals>._fix_tokenôs*ÜU“ ˆØœDk×)¨&°'Ó:ˆŒd‰ ؈ rY)rXr9r)
rdrerrjÚnew_excsÚ token_stringÚtokensÚnew_keyÚtÚ new_values
rZr\r\èsuòô
Dz€HØ $§
¡
¦ ш  "×*¨6°7Ó;ˆGÙAGÓ¸A™ A v¨wÕˆHØ )ˆH ð !-ð
€OùòIsÁAÚlengthÚstartÚstopÚstepcóÞ||dk(sttj«|d}n
|dkr||z
}t|t d|««}||}n
|dkr||z
}t|t ||««}||fS)Nr)r)rÿr0ÚE057ÚminÚmax)rqrrrsrts rZÚnormalize_slicerysð
ˆL˜D AšIÜœŸÓ €}ØØ ŠØ
ˆÜ œ˜A˜u
Ó &€EØ €|ØØ
ŠØ ˆÜ ˆv”s˜5 $Ó (€DØ $ˆrYÚspansr7có,d}t||d¬«}g}t«}|D]c}|j|vsŒ|jdz
|vsŒ$|j |«|j t
|j|j««Œet|d¬«}|S)a€Filter a sequence of spans and remove duplicates or overlaps. Useful for
creating named entities (where one token can only be part of one entity) or
when merging spans with `Retokenizer.merge`. When spans overlap, the (first)
longest span is preferred over shorter spans.
spans (Iterable[Span]): The spans to filter.
RETURNS (List[Span]): The filtered spans.
cóN|j|jz
|j fS)Úendrr©Úspans rZú<lambda>zfilter_spans.<locals>.<lambda>s §¡¨D¯J©JÑ!6¸¿¹¸ Ñ DrYT)r)có|jS)rrr~s rZr€zfilter_spans.<locals>.<lambda>&s¨T¯ZªZrY))rxr*rrr}rwÚrange)rzÚ get_sort_keyÚ sorted_spansrKÚ seen_tokensrs rZÚ filter_spansr†s†ñE€Lܘ% \¸4Ô@€LØ
€FÜ›E€KÛˆà :‰:˜[Ò (¨T¯X©X¸©\ÀÒ-LØ M‰M˜$Ô Ø × Ñ œu T§Z¡Z°·±Ó ô
FÑ 7Ô
8€FØ €MrYcó8ttj|Ž«S)r†Ú itertoolsÚchain)rzs rZÚfilter_chain_spansrŠ*sÜ œ Ÿ¨Ð 0rYz#spacy.first_longest_spans_filter.v1cótS)rXrYrZÚmake_first_longest_spans_filterrŒ.sä ÐrYÚgetterscó@tjt||««S)Ú
msgpack_dumpsÚto_dict)rrs rZÚto_bytesr3sÜ × Ñ œw w°Ó 9rYÚ
bytes_dataÚsetterscóBttj|«||«S)Ú from_dictrúÚ
msgpack_loads)rr“rs rZÚ
from_bytesr—7sô
”U×Ó4°g¸ GrYcó|i}|j«D]&\}}|jd«d|vsŒ|«||<Œ(|S©Nr>r©r9rB)rrÚ
serializedr²Úgetters rZrr?sEð€JØ—}‘}– ˆˆVà 9‰9S>˜   $hˆJsŠOð ÐrYr@có„|j«D],\}}|jd«d|vsŒ||vsŒ"|||«Œ.|Sr™)r@r“rÚsetters rZr•r•JsFð
—}‘}– ˆˆVà 9‰9S‹>˜   °s²
Ù 3s ð €JrYÚwriterscóÐt|«}|j«s|j«|j«D]'\}}|j d«d|vsŒ|||z «Œ)|Sr™)Úmkdirr9rB)rÚwriters rZÚto_diskr£Vs\ô
 €DØ ;‰;Œ=Ø
Œ Ø—}‘}– ˆˆVà 9‰9S>˜   4˜# ð €KrYrgcót|«}|j«D]'\}}|jd«d|vsŒ|||z «Œ)|Sr™)r9rB)rgrÚreaders rZr+r+esJô
 €DØ—}‘}– ˆˆVà 9‰9S>˜   4˜# ð €KrYr?cóÌtjj|t|««}tjj |«}|j
j
|«|S)z¶Import module from a file. Used to load models from a directory.
name (str): Name of module to load.
loc (str / Path): Path to the file.
RETURNS: The loaded module.
)ÚutilÚspec_from_file_locationr Úmodule_from_specÚloaderÚ exec_module)r{r?rns rZÚ import_filer¬rsIô >‰>× 1°$¼¸Ó A€DÜ
^‰^×
,¨TÓ
2€F؇Kј €MrYÚhtmlcób|j«jdd«jdd«S)zïPerform a template-specific, rudimentary HTML minification for displaCy.
Disclaimer: NOT a general-purpose solution, only removes indentation and
newlines.
html (str): Markup to minify.
RETURNS (str): "Minified" HTML.
z rr/)r3r)r­s rZÚ minify_htmlr¯s*ð :‰:‹<× Ñ  ¨Ó +× 3°D¸ =rYcó–|jdd«}|jdd«}|jdd«}|jdd«}|S) zæReplace <, >, &, " with their HTML encoded representation. Intended to
prevent HTML errors in rendered displaCy markup.
text (str): The original text.
RETURNS (str): Equivalent text to be safely used within HTML.
ú&z&amp;r}z&lt;rrz&gt;ú"z&quot;)r)r`s rZÚ escape_htmlr³ŠsIð <‰<˜˜ %€DØ <‰<˜˜ $€DØ <‰<˜˜ $€DØ <‰<˜˜ &€DØ €KrYÚwordscófdjdj|«j««dj|j««k7r*ttjj ||¬««g}g}d}|Dcgc]}|j
«rŒ|Œ}}|D]} ||dj|«}|dkDr-|j||||z«|jd«||z
}|j|«|jd«|t|«z
}|t|«ksŒ‰||dk(sŒ’d|d<|d z
}Œ|t|«kr%|j||d«|jd«||fScc}w#t$r,ttjj ||¬««dwxYw)
a†Given a list of words and a text, reconstruct the original tokens and
return a list of words and spaces that can be used to create a Doc. This
can help recover destructive tokenization that didn't preserve any
whitespace information.
words (Iterable[str]): The words.
text (str): The original text.
RETURNS (Tuple[List[str], List[bool]]): The words and spaces.
r)r`r´rNFrÐTéÿÿÿÿr))
rBrÿr0ÚE194rŒÚisspacerÂrwr€)r´r`Ú
text_wordsÚ text_spacesÚtext_posÚwordÚ
norm_wordsÚ
word_starts rZÚget_words_and_spacesr¿˜ð
‡wwˆrw‰wu~×&¨"¯'©'°$·*±*³,Ó*?ÒœŸ×¸UЀJØ€KØ€Há#(Ó?¡5˜4°· ± µ’$ 5€JЈð Sؘh˜i˜×.¨tÓ4ˆJ𠘊>Ø × Ñ ˜d 8¨h¸Ñ.CÐ × Ñ ˜uÔ ˜
Ñ "ˆHØ×ј$ÔØ×ј5Ô”C˜“IÑˆØ ”c˜$“iÓ  ¡N°cÓ$9Ø"ˆ‰OØ ˜‰M‰Hðð”#d“)ÒØ×ј$˜x˜y˜/Ô×ј5Ô ˜ Ð $ùò)@øô
ò SÜœVŸ[™[×/°TÀÐHÈdÐ  SúsÂE6ÂE6Â%E;Å;5F0có¤ t|«j«S#t$r+ttjj |¬««dwxYw)zµDeep copy a Config. Will raise an error if the config contents are not
JSON-serializable.
config (Config): The config to copy.
RETURNS (Config): The copied config.
r0N)r rÿr0ÚE961rŒr0s rZÚ copy_configrÂÁsKðFÜf~×$øÜ òFÜœŸ×+°6ÐÐFús 4AÚvaluescóði}|j«D]`\}}|}|j«jd«}t|«D],\}}|t |«dz
k(}|j ||r|ni«}Œ.Œb|S)aConvert dot notation to a dict. For example: {"token.pos": True,
"token._.xyz": True} becomes {"token": {"pos": True, "_": {"xyz": True }}}.
values (Dict[str, Any]): The key/value pairs to convert.
RETURNS (Dict[str, dict]): The converted values.
r>r))r9ÚlowerrBÚ enumerater€Ú
setdefault) rÃrKr|ÚpartsÚitemÚis_lasts rZÚ dot_to_dictrÌÎsxð!€FØ—l‘l–n‰
ˆˆU؈ؗ ‘ “ ×! #Ó&ˆÜ  Ö'‰GˆAˆœ3˜u™>Ñ)ˆ—?? 4±'©¸B‰Dñ €MrYr#r$cónt||¬«Dcic]\}}dj|«|Œc}}Scc}}w)aeConvert dot notation to a dict. For example: {"token": {"pos": True,
"_": {"xyz": True }}} becomes {"token.pos": True, "token._.xyz": True}.
obj (Dict[str, dict]): The dict to convert.
for_overrides (bool): Whether to enable special handling for registered
functions in overrides.
RETURNS (Dict[str, Any]): The key/value pairs.
r#r>)Ú walk_dictrŠ)rr$r|s rZr(r(ßsAô$ C°}Õ áE‰JˆCð 
Ø ðùó s1rJcóÀ|}|jd«}|D]} ||}Œ
|S#ttf$r+ttjj |¬««dwxYw)a`Convert dot notation of a "section" to a specific part of the Config.
e.g. "training.optimizer" would return the Optimizer object.
Throws an error if the section is not defined in this config.
config (Config): The config.
section (str): The dot notation of the section in the config.
RETURNS: The object denoted by the section
r>rN)rBrFr0ÚE952rŒ)rrJÚ componentrÈs rZrErEîsoð€IØ M‰M˜#Ó €EÛˆð GØ! $™‰Iðð
Ðøôœ)Ð GÜœ6Ÿ;™;×-°7Ð<À$Ð  Gús š#£:Ar|có|}|jd«}t|«D]"\}} |t|«dz
k(r|||<n||}Œ$y#ttf$r+tt
j j|¬««dwxYw)zÒUpdate a config at a given position from a dot notation.
config (Config): The config.
section (str): The dot notation of the section in the config.