Is there a method around that tests if 2 URLs are equal, ie point to the same place.
I am not talking about 2 URLs with different domain names pointing to the same IP address but for example 2 URLs that point to the same .aspx page:
is equal to these:
- http://example.com/Products/Default.aspx
- http://example.com/Products/
- ~/Products/Default.aspx
- ~/Products/
Note/assumtions
- QueryString Values are Ignored
- ASP.NET (Pref C#)
- Default.aspx is the default page
—-UPDATE—-
This is a very crude method that tests a URL to see if matches the current URL:
I tried the creating a new Uri() with both the local and check URLs but dont know that works and went down the string checking avenue.
The implementation of the SiteMapProvider skips this step if the URL starts with “Http” as this assumes an external URL. Since I have an SaaS framework that will always ensure relative paths (as these can be on different subdomains) it easier to strip things down.
Any comments on optimization? I guess for a start we can pass in a variable containing the current URL? Not sure of the overhead of calling HttpContext.Current.Request.Url.LocalPath many times?
/// <summary>
/// Assumes URL is relative aspx page or folder path
/// </summary>
/// <param name="url"></param>
/// <returns></returns>
public static bool CurrentURLMatch(string url)
{
string localURL = HttpContext.Current.Request.Url.LocalPath;
if (HttpContext.Current.Request.Url.Host == "localhost")
{
localURL = localURL.Substring(localURL.IndexOf('/') + 1);
localURL = localURL.Substring(localURL.IndexOf('/'));
}
string compareURL = url.ToLower();
// Remove QueryString Values
if (localURL.Contains("?"))
{
localURL = localURL.Split('?')[0];
}
if (compareURL.Contains("?"))
{
compareURL = compareURL.Split('?')[0];
}
if (localURL.Contains("#"))
{
localURL = localURL.Split('#')[0];
}
if (compareURL.Contains("?"))
{
compareURL = compareURL.Split('#')[0];
}
// Prepare End of Local URL
if (!localURL.Contains("aspx"))
{
if (!localURL.EndsWith("/"))
{
localURL = String.Concat(localURL, "/");
}
}
// Prepare End of Compare URL
if (!compareURL.Contains("aspx"))
{
if (!compareURL.EndsWith("/"))
{
compareURL = String.Concat(localURL, "/");
}
}
if (localURL.EndsWith(@"/"))
{
localURL = String.Concat(localURL, "Default.aspx");
}
if (compareURL.EndsWith(@"/"))
{
compareURL = String.Concat(compareURL, "Default.aspx");
}
if (compareURL.Contains(@"//"))
{
compareURL = compareURL.Replace(@"//", String.Empty);
compareURL = compareURL.Substring(compareURL.IndexOf("/") + 1);
}
compareURL = compareURL.Replace("~", String.Empty);
if (localURL == compareURL)
{
return true;
}
return false;
}
Answers:
Thank you for visiting the Q&A section on Magenaut. Please note that all the answers may not help you solve the issue immediately. So please treat them as advisements. If you found the post helpful (or not), leave a comment & I’ll get back to you as soon as possible.
Method 1
for the record, here is the translation of http://en.wikipedia.org/wiki/URL%5Fnormalization to C#:
using System;
using System.Web;
namespace UrlNormalizationTest
{
public static class UrlNormalization
{
public static bool AreTheSameUrls(this string url1, string url2)
{
url1 = url1.NormalizeUrl();
url2 = url2.NormalizeUrl();
return url1.Equals(url2);
}
public static bool AreTheSameUrls(this Uri uri1, Uri uri2)
{
var url1 = uri1.NormalizeUrl();
var url2 = uri2.NormalizeUrl();
return url1.Equals(url2);
}
public static string[] DefaultDirectoryIndexes = new[]
{
"default.asp",
"default.aspx",
"index.htm",
"index.html",
"index.php"
};
public static string NormalizeUrl(this Uri uri)
{
var url = urlToLower(uri);
url = limitProtocols(url);
url = removeDefaultDirectoryIndexes(url);
url = removeTheFragment(url);
url = removeDuplicateSlashes(url);
url = addWww(url);
url = removeFeedburnerPart(url);
return removeTrailingSlashAndEmptyQuery(url);
}
public static string NormalizeUrl(this string url)
{
return NormalizeUrl(new Uri(url));
}
private static string removeFeedburnerPart(string url)
{
var idx = url.IndexOf("utm_source=", StringComparison.Ordinal);
return idx == -1 ? url : url.Substring(0, idx - 1);
}
private static string addWww(string url)
{
if (new Uri(url).Host.Split('.').Length == 2 && !url.Contains("://www."))
{
return url.Replace("://", "://www.");
}
return url;
}
private static string removeDuplicateSlashes(string url)
{
var path = new Uri(url).AbsolutePath;
return path.Contains("//") ? url.Replace(path, path.Replace("//", "/")) : url;
}
private static string limitProtocols(string url)
{
return new Uri(url).Scheme == "https" ? url.Replace("https://", "http://") : url;
}
private static string removeTheFragment(string url)
{
var fragment = new Uri(url).Fragment;
return string.IsNullOrWhiteSpace(fragment) ? url : url.Replace(fragment, string.Empty);
}
private static string urlToLower(Uri uri)
{
return HttpUtility.UrlDecode(uri.AbsoluteUri.ToLowerInvariant());
}
private static string removeTrailingSlashAndEmptyQuery(string url)
{
return url
.TrimEnd(new[] { '?' })
.TrimEnd(new[] { '/' });
}
private static string removeDefaultDirectoryIndexes(string url)
{
foreach (var index in DefaultDirectoryIndexes)
{
if (url.EndsWith(index))
{
url = url.TrimEnd(index.ToCharArray());
break;
}
}
return url;
}
}
}
With the following tests:
using NUnit.Framework;
using UrlNormalizationTest;
namespace UrlNormalization.Tests
{
[TestFixture]
public class UnitTests
{
[Test]
public void Test1ConvertingTheSchemeAndHostToLowercase()
{
var url1 = "HTTP://www.Example.com/".NormalizeUrl();
var url2 = "http://www.example.com/".NormalizeUrl();
Assert.AreEqual(url1, url2);
}
[Test]
public void Test2CapitalizingLettersInEscapeSequences()
{
var url1 = "http://www.example.com/a%c2%b1b".NormalizeUrl();
var url2 = "http://www.example.com/a%C2%B1b".NormalizeUrl();
Assert.AreEqual(url1, url2);
}
[Test]
public void Test3DecodingPercentEncodedOctetsOfUnreservedCharacters()
{
var url1 = "http://www.example.com/%7Eusername/".NormalizeUrl();
var url2 = "http://www.example.com/~username/".NormalizeUrl();
Assert.AreEqual(url1, url2);
}
[Test]
public void Test4RemovingTheDefaultPort()
{
var url1 = "http://www.example.com:80/bar.html".NormalizeUrl();
var url2 = "http://www.example.com/bar.html".NormalizeUrl();
Assert.AreEqual(url1, url2);
}
[Test]
public void Test5AddingTrailing()
{
var url1 = "http://www.example.com/alice".NormalizeUrl();
var url2 = "http://www.example.com/alice/?".NormalizeUrl();
Assert.AreEqual(url1, url2);
}
[Test]
public void Test6RemovingDotSegments()
{
var url1 = "http://www.example.com/../a/b/../c/./d.html".NormalizeUrl();
var url2 = "http://www.example.com/a/c/d.html".NormalizeUrl();
Assert.AreEqual(url1, url2);
}
[Test]
public void Test7RemovingDirectoryIndex1()
{
var url1 = "http://www.example.com/default.asp".NormalizeUrl();
var url2 = "http://www.example.com/".NormalizeUrl();
Assert.AreEqual(url1, url2);
}
[Test]
public void Test7RemovingDirectoryIndex2()
{
var url1 = "http://www.example.com/default.asp?id=1".NormalizeUrl();
var url2 = "http://www.example.com/default.asp?id=1".NormalizeUrl();
Assert.AreEqual(url1, url2);
}
[Test]
public void Test7RemovingDirectoryIndex3()
{
var url1 = "http://www.example.com/a/index.html".NormalizeUrl();
var url2 = "http://www.example.com/a/".NormalizeUrl();
Assert.AreEqual(url1, url2);
}
[Test]
public void Test8RemovingTheFragment()
{
var url1 = "http://www.example.com/bar.html#section1".NormalizeUrl();
var url2 = "http://www.example.com/bar.html".NormalizeUrl();
Assert.AreEqual(url1, url2);
}
[Test]
public void Test9LimitingProtocols()
{
var url1 = "https://www.example.com/".NormalizeUrl();
var url2 = "http://www.example.com/".NormalizeUrl();
Assert.AreEqual(url1, url2);
}
[Test]
public void Test10RemovingDuplicateSlashes()
{
var url1 = "http://www.example.com/foo//bar.html".NormalizeUrl();
var url2 = "http://www.example.com/foo/bar.html".NormalizeUrl();
Assert.AreEqual(url1, url2);
}
[Test]
public void Test11AddWww()
{
var url1 = "http://example.com/".NormalizeUrl();
var url2 = "http://www.example.com".NormalizeUrl();
Assert.AreEqual(url1, url2);
}
[Test]
public void Test12RemoveFeedburnerPart()
{
var url1 = "http://site.net/2013/02/firefox-19-released/?utm_source=rss&utm_medium=rss&utm_campaign=firefox-19-released".NormalizeUrl();
var url2 = "http://site.net/2013/02/firefox-19-released".NormalizeUrl();
Assert.AreEqual(url1, url2);
}
}
}
Method 2
You might be looking for URL normalization techniques. They might be a good starting point 🙂
Once you have normalized the URLs, you simply need to check if they are equal (keep in mind your assumptions, for instance, you discard the querystring).
Method 3
You could probably use the Uri class to check individual parts of the urls, after converting each to the right format.
// Create the URI objects
// TODO: Use the right constructor overloads,
// or do some processing beforehand to accomodate for the different scenarios
Uri uri1 = new Uri(url1);
Uri uri2 = new Uri(url2);
// There are overlaods for the constructor too
Uri uri3 = new Uri(url3, UriKind.Absolute);
// Check the correct properties
// TODO: Use the right properties...
if (uri1.AbsolutePath == uri2.AbsolutePath)
{
// Urls match
}
Method 4
Maybe this tutorial can be of help to you?
“…You want to see how to handle
identical Urls in the sitemap (which
is forbidden by the out-of-the-box
SiteMapProvider)…”
/// <summary>
/// SiteMap datasources cannot have duplicate Urls with the default provider.
/// This finds duplicate urls in your heirarchy and tricks the provider into treating
/// them correctly
/// </summary>
private void modifyDuplicateUrls()
{
StringCollection urls = new StringCollection();
string rowUrl = String.Empty;
uint duplicateCounter = 0;
string urlModifier = String.Empty;
foreach (DataTable dt in this.DataSource.Tables)
{
foreach (DataRow dr in dt.Rows)
{
rowUrl = (string)dr["Url"];
if (urls.Contains(rowUrl))
{
duplicateCounter++;
if (rowUrl.Contains("?"))
{
urlModifier = "&instance=" + duplicateCounter.ToString();
}
else
{
urlModifier = "?instance=" + duplicateCounter.ToString();
}
dr["Url"] = rowUrl + urlModifier;
}
else
{
urls.Add(rowUrl);
}
}
}
}
}
Method 5
What about seeing if Server.MapPath is equal for both urls? (assuming this is an ASP.NET application, not ASP.NET MVC)
if (Server.MapPath(url1).ToLower() == Server.MapPath(url2).ToLower())
{
return true;
}
else
{
return false;
}
Method 6
frankly, just load the URLs and compare their html contents?
All methods was sourced from stackoverflow.com or stackexchange.com, is licensed under cc by-sa 2.5, cc by-sa 3.0 and cc by-sa 4.0