如何使用C ++代码检查指定的字符串是否是有效的URL [英] How to check a specified string is a valid URL or not using C++ code

查看:139
本文介绍了如何使用C ++代码检查指定的字符串是否是有效的URL的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

有任何可能的方法来检查指定的字符串是否是有效的URL。解决方案必须是c ++,它应该没有互联网。



示例字符串



好​​。早上

foo.goo.koo

https:// hhhh

hdajdklbcbdhd

8881424.www.hfbn55。 co.in/sdfsnhjk

://dgdh24.vom

dfgdfgdf(2001)/。com / sdgsgh

\ adiihsdfghnhg.co。 inskdhhj

aser // www.gtyuh.co.uk / kdsfgdfgfrgj



我的尝试: < br $>


#includestdafx.h

#include< windows.h>

using namespace System;

使用命名空间System :: IO;

int iDomCount = 0;

void dominit();

void main(int argc,_TCHAR * argv [])

{



CString Uri,Temp,strDname;

int iLoc,iAsc,iLen;

char cStr;

尝试

{

cout<<输入Url \ n;

Uri = Console :: ReadLine();

Temp = Uri;

if((Uri.Find(Lhttps,0))> = 0)

Uri = Uri.Mid(8);

else if((Uri.Find(Lhttp,0))> = 0)

Uri = Uri.Mid( 7);

if((Uri.Find(Lwww。,0))> = 0)

Uri = Uri.Mid(4);

for(int len = 0; len< Uri.GetLength(); len ++)

{

iAsc = Uri.GetAt(len) ;

if(((iAsc> 64)&&(iAsc< 91))||((iAsc> 96)&&(iAsc< 123))| |((iAsc> 47)&&(iAsc< 58))||(iAsc == 46)||(iAsc == 45))

iLoc ++;

else

休息;

}

if(iLoc< 1)

{

cout<<无效的网址;

系统(暂停);

Uri =;

控制台::清除();

}

其他

{

Uri = Uri.Mid(0 ,(iLoc));

int ifound = Uri.ReverseFind(L'。');

if(ifound< 0)

{

cout<<无效的网址;

系统(暂停);

Uri =;

控制台:: Clear();

}

else

{

strDname = Uri.Mid(ifound);





}

}



}

catch(...)

{

}

}

void dominit()

{

StreamReader ^ sr = gcnew StreamReader(dnmout.txt);

String ^ line;



//从文件中读取并显示行,直到

//文件结束为止。

while (line = sr-> ReadLine())

{

CString str3(line);

char * sz;

sprintf(sz,%S,str3);

dname [iDomCount] = sz;

iDomCount ++;

}

}





//这个代码我试过的。但它只适用于预定义的子域列表,我也尝试过使用c ++的REGEX,但它不适用于所有类型的url。请为它解决任何问题。

解决方案

你的一些示例字符串不是有效的URL(参见统一资源定位器 - 维基百科,免费的百科全书 [ ^ ]或有效的URI(统一资源标识符 - 维基百科,免费的百科全书 [ ^ ])。因此,您必须首先定义允许/支持的内容。



这可能是例如缺少的方案被任何浏览器完成的默认方案替换默认情况下使用 http ,或者将不带冒号的方案视为Windows共享的服务器名称。



然后将输入拆分为零件,并使用零件特定规则检查每个零件。



请注意,某些部件可能有不同的规则,具体取决于其他部分。一个例子是Windows共享(由服务器名称表示为不带冒号的方案),其中路径和文件名部分中不允许使用特定字符,而URL中允许使用这些字符(例如引号和星号)。


请参阅此处:验证 - 哪些字符使URL无效? - Stack Overflow [ ^ ]

最好的起点是 IsValidURL函数(Windows) [ ^ ]或 PathIsURL函数(Windows) [ ^ ]



  #include   <   iostream  >  
#include < windows.h >
#include < tchar.h >
# include < urlmon.h >
#pragma comment(lib,urlmon.lib)
#pragma comment(lib,wininet.lib)

使用 命名空间标准;

void testURL(LPCTSTR Url)
{
HRESULT hr;

hr = IsValidURL(NULL,Url, 0 );
switch (hr)
{
case S_OK:
cout<< szURL参数包含有效的URL。\ n;
break ;
case S_FALSE:
cout<< szURL参数不包含有效的URL。\ n;
break ;
case E_INVALIDARG:
cout<< 其中一个参数无效。\ n;
break ;
默认
cout<< 未知错误\ n;
break ;
}
printf( %x,hr);
}

int main(){
LPCTSTR Url = _T( http://www.codeproject.com/Questions/1114838/How-to-check-a-specified-string-is-a-valid-URL-或);

testURL(Url);

return 0 ;
}





可以针对此在线验证器检查结果:验证URL地址 - FormValidation [ ^ ]


尝试RegEx(正则表达式)。

您会在Google上找到一些符合以下网址的RegEx:

 ^((http [s]?| ftp):\ /)?\ /?([^:\ / \ s] +)((\ / \\ \\w +)* \ /)([\w\-\。] + +)(*)(#[\w\  - ] [^#\s?]。?+)

there any possible way to check that the specified string is a valid url or not. The solution must be in c++ and it should work without internet.

example strings are

good.morning
foo.goo.koo
https://hhhh
hdajdklbcbdhd
8881424.www.hfbn55.co.in/sdfsnhjk
://dgdh24.vom
dfgdfgdf(2001)/.com/sdgsgh
\adiihsdfghnhg.co.inskdhhj
aser//www.gtyuh.co.uk/kdsfgdfgfrgj

What I have tried:

#include "stdafx.h"
#include <windows.h>
using namespace System;
using namespace System::IO;
int iDomCount =0;
void dominit();
void main(int argc, _TCHAR* argv[])
{

CString Uri,Temp,strDname;
int iLoc,iAsc,iLen;
char cStr;
try
{
cout<<"Enter Url\n";
Uri=Console::ReadLine();
Temp=Uri;
if((Uri.Find(L"https",0)) >= 0)
Uri=Uri.Mid(8);
else if((Uri.Find(L"http",0)) >= 0)
Uri=Uri.Mid(7);
if((Uri.Find(L"www.",0)) >= 0)
Uri=Uri.Mid(4);
for (int len=0;len < Uri.GetLength();len++)
{
iAsc=Uri.GetAt(len);
if ( ((iAsc > 64) && (iAsc < 91)) || ((iAsc > 96) && (iAsc < 123)) || ((iAsc > 47) && (iAsc < 58)) || (iAsc == 46) || (iAsc == 45))
iLoc++;
else
break;
}
if (iLoc < 1)
{
cout<<"Invalid Url";
system("pause");
Uri="";
Console::Clear();
}
else
{
Uri=Uri.Mid(0,(iLoc));
int ifound=Uri.ReverseFind(L'.');
if (ifound < 0)
{
cout<<"Invalid Url";
system("pause");
Uri="";
Console::Clear();
}
else
{
strDname=Uri.Mid(ifound);


}
}

}
catch(...)
{
}
}
void dominit()
{
StreamReader^ sr = gcnew StreamReader( "dnmout.txt" );
String^ line;

// Read and display lines from the file until the end of
// the file is reached.
while ( line = sr->ReadLine() )
{
CString str3(line);
char *sz;
sprintf(sz, "%S", str3);
dname[iDomCount]=sz;
iDomCount ++;
}
}


//this code what i tried. but it only works with the predefined list of sub domains, I've also tried the REGEX with c++ but it will not work with all types of url. please any solution for it.

解决方案

Some of your example strings are not valid URLs (see Uniform Resource Locator - Wikipedia, the free encyclopedia[^]) or valid URIs (Uniform Resource Identifier - Wikipedia, the free encyclopedia[^]). So you have to define first what is allowed / to be supported.

This might be for example that a missing scheme is replaced by a default one like done by any browser which uses http by default or that a scheme without colon is treated as the server name of a Windows share.

Then split the input into parts and check each part using the part specific rules.

Note that there may be different rules for some parts depending on other parts. An example would be Windows shares (indicated by the server name as scheme without colon) where specific characters would not be allowed in path and file name parts while these characters are allowed in URLs (e.g. quotation mark and asterisk).


See here: validation - Which characters make a URL invalid? - Stack Overflow[^]
The best place to begin is IsValidURL function (Windows)[^] or PathIsURL function (Windows)[^]

#include<iostream>
#include<windows.h>
#include <tchar.h>
#include <urlmon.h>
#pragma comment(lib, "urlmon.lib")
#pragma comment(lib,"wininet.lib")

using namespace std;

void testURL(LPCTSTR Url)
{
	HRESULT hr;

	hr = IsValidURL(NULL, Url, 0);
	switch (hr)
	{
	case S_OK:
		cout << "The szURL parameter contains a valid URL.\n";
		break;
	case S_FALSE:
		cout << "The szURL parameter does not contain a valid URL.\n";
		break;
	case E_INVALIDARG:
		cout << "One of the parameters is invalid.\n";
		break;
	default:
		cout << "Unknown error\n";
		break;
	}
	printf("%x", hr);
}

int main() {
	LPCTSTR Url = _T("http://www.codeproject.com/Questions/1114838/How-to-check-a-specified-string-is-a-valid-URL-or");

	testURL(Url);

	return 0;
}



The results may be checked against this online validator: Validate an URL address - FormValidation[^]


Try RegEx (Regular Expressions).
You will find with Google some RegEx that will match an URL like:

^((http[s]?|ftp):\/)?\/?([^:\/\s]+)((\/\w+)*\/)([\w\-\.]+[^#?\s]+)(.*)?(#[\w\-]+)?


这篇关于如何使用C ++代码检查指定的字符串是否是有效的URL的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆