|
[ milan90 @ 26.06.2008. 17:53 ] @
| Potrebna mi je pomoc kako da napravim program da mi nakon unesenog url-a snima txt fajl sa svim linkovima koji se nalaze na toj stranici.
Imao sam prije program koji radi nesto slicno, ali ne mogu se nikako sjetiti kako se zove. Prvenstveno trazim pomoc oko koda, ali i ako zna neko da ima slican program takodje bi dobro doslo.  |
[ Aleksandar Ružičić @ 26.06.2008. 20:18 ] @
za to postoje dva nacina: prvi (bolji ali mnog mnogo tezi i komplikovaniji) je da napises HTML parser i da iscupas sve <a> elemente, drugi (nesto jednostavniji) je da "iscupas" href atribut svakog <a> elementa pomocu regularnih izraza, recimo:
Code:
Sub ExtractUrls(source As String)
Dim oRegExp As RegExp
Dim oMatches As MatchCollection
Dim oMatch As Match
Set oRegExp = New RegExp
oRegEXp.Pattern = "<a href=""([^""]+)"""
oRegExp.Global = TRue
oRegExp.IgnoreCase = True
Set oMatches = oRegExp.Execute(source)
For Each oMatch in oMatches
Debug.Print oMatch.Value
Next
End Sub
sto se tice regularnih izraza pogeldaj ovo: http://www.elitesecurity.org/t300319-0#1860144
[ Shadowed @ 26.06.2008. 20:33 ] @
Ako ta html strana postuje xhtml standarde mozes proci kroz nju i nekim xml parserom, mislim da ih i u shestici ima.
[ Aleksandar Ružičić @ 26.06.2008. 21:31 ] @
ima ih ali je mnogo jednostavnije koristiti regularne izraze upravo zbog toga sto veliki broj sajtova nije xhtml (ima nemali broj i onih koji imaju xhtml doctype a opet ne postuju xhtml standarde), mada ni ovaj regexp nije idealan (recimo ukoliko je neko naveo neki drugi atribut pre href onda ga ovaj regexp nece pokupiti, mada to moze da se sredi malo komplikovanijim izrazom...)
[ milan90 @ 27.06.2008. 13:21 ] @
Hvala, mislim da cu uspjeti prilagoditi ovo.
[ Eurora3D Team @ 27.06.2008. 15:20 ] @
Evo ti kod sa parsiranjem, ... nije savrsen ali radi
Podesen je da parsira ovu stranu
Code:
Option Explicit
Private Const INTERNET_OPEN_TYPE_PRECONFIG = 0
Private Const INTERNET_FLAG_RELOAD = &H80000000
Private Declare Function InternetOpen Lib "wininet.dll" Alias "InternetOpenA" (ByVal sAgent As String, ByVal lAccessType As Long, ByVal sProxyName As String, ByVal sProxyBypass As String, ByVal lFlags As Long) As Long
Private Declare Function InternetOpenUrl Lib "wininet.dll" Alias "InternetOpenUrlA" (ByVal pub_lngInternetSession As Long, ByVal lpszUrl As String, ByVal lpszHeaders As String, ByVal dwHeadersLength As Long, ByVal dwFlags As Long, ByVal dwContext As Long) As Long
Private Declare Function InternetReadFile Lib "wininet.dll" (ByVal hFile As Long, ByVal sBuffer As String, ByVal lNumBytesToRead As Long, lNumberOfBytesRead As Long) As Integer
Private Declare Function InternetCloseHandle Lib "wininet.dll" (ByVal hInet As Long) As Integer
Dim fso, fs
Public Function DownloadFile(URL As String) As String
On Error GoTo Err
Dim hInternetSession As Long
Dim hUrlFile As Long
Dim sReadBuffer As String * 4096 ' 4k odjednom
Dim sBuffer As String
Dim lNumberOfBytesRead As Long
Dim bDoLoop As Boolean
hInternetSession = InternetOpen("FileDownload", INTERNET_OPEN_TYPE_PRECONFIG, vbNullString, vbNullString, 0)
If hInternetSession = 0 Then GoTo Err
hUrlFile = InternetOpenUrl(hInternetSession, URL, vbNullString, 0, INTERNET_FLAG_RELOAD, 0)
If hUrlFile = 0 Then GoTo Err
bDoLoop = True
While bDoLoop
sReadBuffer = ""
bDoLoop = InternetReadFile(hUrlFile, sReadBuffer, _
Len(sReadBuffer), lNumberOfBytesRead)
sBuffer = sBuffer & Left$(sReadBuffer, lNumberOfBytesRead)
If Not CBool(lNumberOfBytesRead) Then bDoLoop = False
Wend
InternetCloseHandle (hUrlFile)
InternetCloseHandle (hInternetSession)
If sBuffer = "" Then GoTo Err
DownloadFile = sBuffer
Exit Function
Err:
DownloadFile = ""
End Function
Sub ParseURLs(HTML As String)
If HTML = "" Then Exit Sub
Dim st As Long, en As Long
st = InStr(1, HTML, "href=" & Chr(34))
While st
st = st + 6
en = InStr(st, HTML, Chr(34))
If en = 0 Then Exit Sub
fs.WriteLine Mid(HTML, st, en - st)
st = InStr(en, HTML, "href=" & Chr(34))
Wend
End Sub
Private Sub Form_Load()
On Error GoTo Err
Set fso = CreateObject("Scripting.FileSystemObject")
Set fs = fso.CreateTextFile("C:\urls.txt", 2) ' tekst fajl
Dim buffer As String
buffer = DownloadFile("http://www.elitesecurity.org/t328044-Pregled-linkova-web-stranice") ' url
ParseURLs (buffer)
fs.Close
Set fs = Nothing
Set fso = Nothing
Err:
End
End Sub
[ milan90 @ 28.06.2008. 08:14 ] @
Ne znam kako, ali meni javlja par gresaka, ove nikako da skontam.
Code:
Public Function DownloadFile(URL As String) As String
On Error GoTo Err
Dim hInternetSession As Long
Dim hUrlFile As Long
Dim sReadBuffer As String * 4096 ' 4k odjednom
Dim sBuffer As String
Dim lNumberOfBytesRead As Long
Dim bDoLoop As Boolean
hInternetSession = InternetOpen("FileDownload", INTERNET_OPEN_TYPE_PRECONFIG, vbNullString, vbNullString, 0)
If hInternetSession = 0 Then GoTo Err
hUrlFile = InternetOpenUrl(hInternetSession, URL, vbNullString, 0, INTERNET_FLAG_RELOAD, 0)
If hUrlFile = 0 Then GoTo Err
bDoLoop = True
While bDoLoop
sReadBuffer = ""
bDoLoop = InternetReadFile(hUrlFile, sReadBuffer, _
Len(sReadBuffer), lNumberOfBytesRead)
sBuffer = sBuffer & [blue]Left$[/blue] (sReadBuffer, lNumberOfBytesRead)
If Not CBool(lNumberOfBytesRead) Then bDoLoop = False
Wend
InternetCloseHandle (hUrlFile)
InternetCloseHandle (hInternetSession)
If sBuffer = "" Then GoTo Err
DownloadFile = sBuffer
Exit Function
Err:
DownloadFile = ""
End Function
Sub ParseURLs(HTML As String)
If HTML = "" Then Exit Sub
Dim st As Long, en As Long
st = InStr(1, HTML, "href=" & Chr(34))
While st
st = st + 6
en = InStr(st, HTML, Chr(34))
If en = 0 Then Exit Sub
fs.WriteLine Mid(HTML, st, en - st)
st = [blue]InStr(en, HTML, "href=" & Chr(34))[/blue]
Wend
End Sub
1.'Public Property Left() As Integer' has no parameters and its return type cannot be indexed.
2. Type character '$' does not match declared data type 'Integer'.
3.Overload resolution failed because no accessible 'InStr' can be called without a narrowing conversion:
'Public Function InStr(Start As Integer, String1 As String, String2 As String, [Compare As Microsoft.VisualBasic.CompareMethod = Microsoft.VisualBasic.CompareMethod.Binary]) As Integer': Argument matching parameter 'Start' narrows from 'Long' to 'Integer'.
'Public Function InStr(String1 As String, String2 As String, [Compare As Microsoft.VisualBasic.CompareMethod = Microsoft.VisualBasic.CompareMethod.Binary]) As Integer': Argument matching parameter 'String1' narrows from 'Long' to 'String'.
'Public Function InStr(String1 As String, String2 As String, [Compare As Microsoft.VisualBasic.CompareMethod = Microsoft.VisualBasic.CompareMethod.Binary]) As Integer': Argument matching parameter 'Compare' narrows from 'String' to 'Microsoft.VisualBasic.CompareMethod'.
[Ovu poruku je menjao Aleksandar Ružičić dana 28.06.2008. u 12:12 GMT+1]
[ Aleksandar Ružičić @ 28.06.2008. 11:14 ] @
pa sto ne rece da radis u vb.net? :)
ovo je forum za Visual Basic 6
[ milan90 @ 28.06.2008. 16:10 ] @
Pa zapravo mi je svejedno! Prvo sam i mislio da bude u VB6, ali eto..
[ Eurora3D Team @ 29.06.2008. 03:25 ] @
Radi to i u NETu, Samo ga snimis iz VB6, otvoris NET i uradis Upgade
Procitaj malo dokumentaciju ...
Code:
Option Strict Off
Option Explicit On
Imports VB = Microsoft.VisualBasic
Friend Class Form1
Inherits System.Windows.Forms.Form
Private Const INTERNET_OPEN_TYPE_PRECONFIG As Short = 0
Private Const INTERNET_FLAG_RELOAD As Integer = &H80000000
Private Declare Function InternetOpen Lib "wininet.dll" Alias "InternetOpenA"(ByVal sAgent As String, ByVal lAccessType As Integer, ByVal sProxyName As String, ByVal sProxyBypass As String, ByVal lFlags As Integer) As Integer
Private Declare Function InternetOpenUrl Lib "wininet.dll" Alias "InternetOpenUrlA"(ByVal pub_lngInternetSession As Integer, ByVal lpszUrl As String, ByVal lpszHeaders As String, ByVal dwHeadersLength As Integer, ByVal dwFlags As Integer, ByVal dwContext As Integer) As Integer
Private Declare Function InternetReadFile Lib "wininet.dll" (ByVal hFile As Integer, ByVal sBuffer As String, ByVal lNumBytesToRead As Integer, ByRef lNumberOfBytesRead As Integer) As Short
Private Declare Function InternetCloseHandle Lib "wininet.dll" (ByVal hInet As Integer) As Short
Dim fso, fs As Object
Public Function DownloadFile(ByRef URL As String) As String
On Error GoTo Err_Renamed
Dim hInternetSession As Integer
Dim hUrlFile As Integer
Dim sReadBuffer As New VB6.FixedLengthString(4096) ' 4k odjednom
Dim sBuffer As String
Dim lNumberOfBytesRead As Integer
Dim bDoLoop As Boolean
hInternetSession = InternetOpen("FileDownload", INTERNET_OPEN_TYPE_PRECONFIG, vbNullString, vbNullString, 0)
If hInternetSession = 0 Then GoTo Err_Renamed
hUrlFile = InternetOpenUrl(hInternetSession, URL, vbNullString, 0, INTERNET_FLAG_RELOAD, 0)
If hUrlFile = 0 Then GoTo Err_Renamed
bDoLoop = True
While bDoLoop
sReadBuffer.Value = ""
bDoLoop = InternetReadFile(hUrlFile, sReadBuffer.Value, Len(sReadBuffer.Value), lNumberOfBytesRead)
sBuffer = sBuffer & VB.Left(sReadBuffer.Value, lNumberOfBytesRead)
If Not CBool(lNumberOfBytesRead) Then bDoLoop = False
End While
InternetCloseHandle(hUrlFile)
InternetCloseHandle(hInternetSession)
If sBuffer = "" Then GoTo Err_Renamed
DownloadFile = sBuffer
Exit Function
Err_Renamed:
DownloadFile = ""
End Function
Sub ParseURLs(ByRef HTML As String)
If HTML = "" Then Exit Sub
Dim st, en As Integer
st = InStr(1, HTML, "href=" & Chr(34))
While st
st = st + 6
en = InStr(st, HTML, Chr(34))
If en = 0 Then Exit Sub
fs.WriteLine(Mid(HTML, st, en - st))
st = InStr(en, HTML, "href=" & Chr(34))
End While
End Sub
Private Sub Form1_Load(ByVal eventSender As System.Object, ByVal eventArgs As System.EventArgs) Handles MyBase.Load
On Error GoTo Err_Renamed
fso = CreateObject("Scripting.FileSystemObject")
fs = fso.CreateTextFile("C:\urls.txt", 2) ' tekst fajl
Dim buffer As String
buffer = DownloadFile("http://www.elitesecurity.org/t328044-Pregled-linkova-web-stranice") ' url
ParseURLs((buffer))
fs.Close()
fs = Nothing
fso = Nothing
Err_Renamed:
End
End Sub
End Class
[ milan90 @ 29.06.2008. 10:09 ] @
Ma znam, samo sam ja napravio jednu VELIKU glupost, ali radi sada sve. Hvala na pomoci!
Copyright (C) 2001-2025 by www.elitesecurity.org. All rights reserved.
|