Skip to content

Commit b301736

Browse files
committed
Resolve redirects when testing URLs for equality
The URL DOIs resolve to can move around, with redirects pointing to the new location. To make the tests more robust, only fail if the URLs differ after redirections. See also https://www.crossref.org/blog/urls-and-dois-a-complicated-relationship/
1 parent 0518560 commit b301736

File tree

1 file changed

+24
-2
lines changed

1 file changed

+24
-2
lines changed

tests/test_doi.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,35 @@
33
import os
44
from pkg_resources import parse_version
55

6+
from urllib.error import HTTPError
7+
from urllib.request import Request, urlopen
8+
from urllib.parse import urlparse, urlunparse
9+
610
import pytest
711

812
from doi import (
913
validate_doi, find_doi_in_text, __version__, pdf_to_doi,
1014
get_real_url_from_doi
1115
)
1216

17+
def simplify_url(u):
18+
return urlparse(u)._replace(query='', fragment='')
19+
20+
def resolve_redirects(u):
21+
u = urlunparse(urlparse(u)._replace(scheme='https'))
22+
req = Request(u, headers={'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64)'})
23+
try:
24+
with urlopen(req) as r:
25+
return simplify_url(r.url)
26+
except HTTPError as e:
27+
print(e)
28+
29+
def normalize_eq(u, v):
30+
return (u == v
31+
or simplify_url(u) == simplify_url(v)
32+
or resolve_redirects(u) == resolve_redirects(v)
33+
)
34+
1335

1436
def test_valid_version() -> None:
1537
"""Check that the package defines a valid __version__"""
@@ -32,7 +54,7 @@ def test_validate_doi() -> None:
3254
'https://linkinghub.elsevier.com/retrieve/pii/S0009261497040141'),
3355
]
3456
for doi, url in data:
35-
assert url == validate_doi(doi)
57+
assert normalize_eq(url, validate_doi(doi))
3658

3759
for doi in ['', 'asdf']:
3860
try:
@@ -49,7 +71,7 @@ def test_get_real_url_from_doi() -> None:
4971
'article/abs/pii/S0009261497040141'),
5072
]
5173
for doi, url in data:
52-
assert url == get_real_url_from_doi(doi)
74+
assert normalize_eq(url, get_real_url_from_doi(doi))
5375

5476

5577
def test_find_doi_in_line() -> None:

0 commit comments

Comments
 (0)