新聞中心
用C語言編程從網(wǎng)頁數(shù)據(jù)庫中提取內(nèi)容

在大數(shù)據(jù)時代,數(shù)據(jù)是一種非常寶貴的資源。許多網(wǎng)站都提供API接口,供開發(fā)人員使用,但對于一些沒有API接口的網(wǎng)站,我們需要使用其他方法來提取數(shù)據(jù)。本文將介紹如何使用C語言編程從網(wǎng)頁數(shù)據(jù)庫中提取內(nèi)容。
1. 網(wǎng)頁抓取
我們需要進行網(wǎng)頁抓取。在C語言中,我們可以使用socket和HTTP協(xié)議來實現(xiàn)網(wǎng)頁抓取。以下是一個簡單的示例,使用socket連接目標網(wǎng)站,并獲取網(wǎng)頁內(nèi)容:
“`c
#include
#include
#include // 在 Windows 平臺需要添加該頭文件
#pragma comment(lib,”ws2_32.lib”) // Windows 平臺需要添加這行代碼
int mn()
{
WSADATA wsaData;
WSAStartup(MAKEWORD(2,1), &wsaData); // 初始化WinSock庫
SOCKET s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); // 創(chuàng)建套接字
SOCKADDR_IN serverAddr;
serverAddr.sin_family = AF_INET;
serverAddr.sin_port = htons(80);
serverAddr.sin_addr.s_addr = inet_addr(“192.168.0.1”);
connect(s, (SOCKADDR*)&serverAddr, sizeof(serverAddr)); // 連接到目標服務(wù)器
char buf[4096];
char request[1024];
sprintf_s(request, “GET / HTTP/1.1\r\nHost: 192.168.0.1\r\nConnection: close\r\n\r\n”); // 發(fā)送 GET 請求
send(s, request, strlen(request), 0); // 發(fā)送請求
int nBytes;
while ((nBytes = recv(s, buf, sizeof(buf), 0)) > 0) // 接收響應(yīng)
{
buf[nBytes] = 0; // 添加字符串結(jié)束符
printf(“%s”, buf); // 輸出響應(yīng)
}
closesocket(s); // 關(guān)閉套接字
WSACleanup(); // 釋放 WinSock 庫資源
return 0;
}
“`
2. 解析HTML
接下來,我們需要解析HTML代碼,找到我們需要的數(shù)據(jù)。我們可以使用libxml2庫來解析HTML,以下是一個簡單的示例:
“`c
#include
#include
void startElement(void *ctx, const xmlChar *name, const xmlChar **attrs)
{
printf(“start element: %s\n”, name);
}
void endElement(void *ctx, const xmlChar *name)
{
printf(“end element: %s\n”, name);
}
int mn()
{
htmlSAXHandler saxHandler = {0};
saxHandler.startElement = startElement;
saxHandler.endElement = endElement;
xmlSAXUserParseMemory(&saxHandler, NULL, “
hello world
“, strlen(“
hello world
“));
return 0;
}
“`
上述代碼解析了一個簡單的HTML代碼,并打印了標簽的起始和結(jié)束。
3. 解析ON
除了解析HTML,我們還需要解析ON格式的數(shù)據(jù)。對于ON格式的數(shù)據(jù),我們可以使用cON庫來解析。以下是一個示例:
“`c
#include
#include
#include “cON.h”
int mn()
{
char jsonStr[] = “{\”name\”:\”John\”,\”age\”:30,\”city\”:\”New York\”}”;
cON *json = cON_Parse(jsonStr);
if (json == NULL)
{
printf(“parse error\n”);
return -1;
}
cON *name = cON_GetObjectItem(json, “name”);
if (name == NULL)
{
printf(“name not found\n”);
cON_Delete(json);
return -1;
}
printf(“name: %s\n”, name->valuestring);
cON *age = cON_GetObjectItem(json, “age”);
if (age == NULL)
{
printf(“age not found\n”);
cON_Delete(json);
return -1;
}
printf(“age: %d\n”, age->valueint);
cON *city = cON_GetObjectItem(json, “city”);
if (city == NULL)
{
printf(“city not found\n”);
cON_Delete(json);
return -1;
}
printf(“city: %s\n”, city->valuestring);
cON_Delete(json);
return 0;
}
“`
上述代碼解析了一個簡單的ON,并輸出了其中的每個字段。
4. 數(shù)據(jù)庫操作
我們需要將抓取到的數(shù)據(jù)存儲到數(shù)據(jù)庫中。對于SQLite數(shù)據(jù)庫,我們可以使用SQLite3庫來實現(xiàn)。以下是一個簡單的示例:
“`c
#include
#include
#include
int mn()
{
sqlite3 *db;
int rc = sqlite3_open(“test.db”, &db);
if (rc != SQLITE_OK)
{
printf(“open database error\n”);
return -1;
}
char *sql = “CREATE TABLE IF NOT EXISTS person (id INTEGER PRIMARY KEY AUTOINCREMENT, name TEXT NOT NULL, age INTEGER NOT NULL);”;
rc = sqlite3_exec(db, sql, NULL, NULL, NULL);
if (rc != SQLITE_OK)
{
printf(“execute error\n”);
return -1;
}
sql = “INSERT INTO person (name, age) VALUES (‘John’, 30);”;
rc = sqlite3_exec(db, sql, NULL, NULL, NULL);
if (rc != SQLITE_OK)
{
printf(“execute error\n”);
return -1;
}
sqlite3_close(db);
return 0;
}
“`
上述代碼創(chuàng)建了一個名為test.db的SQLite數(shù)據(jù)庫,并創(chuàng)建了一個person表。然后插入了一個John的記錄。
成都網(wǎng)站建設(shè)公司-創(chuàng)新互聯(lián),建站經(jīng)驗豐富以策略為先導(dǎo)10多年以來專注數(shù)字化網(wǎng)站建設(shè),提供企業(yè)網(wǎng)站建設(shè),高端網(wǎng)站設(shè)計,響應(yīng)式網(wǎng)站制作,設(shè)計師量身打造品牌風(fēng)格,熱線:028-86922220高分求c#網(wǎng)頁數(shù)據(jù)保存:保存網(wǎng)頁內(nèi)容到數(shù)據(jù)庫中 代碼
///
/// 讀取指定URL地址,存到指定文件中
///
public bool GetSource(Encoding PageEncoding)
{
bool isGet = false;
try
{
WebRequest request = WebRequest.Create(this.url);
//迅嘩使用代理服務(wù)器的處理
if (this.proxyState == 1)
{
//畝陸行默認讀取80端口的數(shù)據(jù)
if (this.proxyPort == null)
this.ProxyPort = “80”;
WebProxy myProxy = new WebProxy();
myProxy = (WebProxy)request.Proxy;
myProxy.Address = new Uri(this.ProxyAddress + “:” + this.ProxyPort);
myProxy.Credentials = new NetworkCredential(this.proxyAccount, this.proxyPassword, this.ProxyDomain);
request.Proxy = myProxy;
}
//請求服務(wù)
WebResponse response = request.GetResponse();
//返回信息
Stream resStream = response.GetResponseStream();
StreamReader sr = new StreamReader(resStream, PageEncoding);
string tempCode = sr.ReadToEnd();
resStream.Close();
sr.Close();
this.outString = tempCode;
//如悉盯果輸出文件路徑為空,便將得到的內(nèi)容賦給OutString屬性
if (this.outFilePath != null)
{
FileInfo fi = new FileInfo(this.outFilePath);
//如果存在文件則先干掉
if (fi.Exists)
fi.Delete();
StreamWriter sw = new StreamWriter(this.outFilePath, true, Encoding.Default);
sw.Write(tempCode);
sw.Flush();
sw.Close();
}
return !isGet;
}
catch
{
this.noteMessage = “出錯了,請檢查網(wǎng)絡(luò)是否連通;”;
return isGet;
}
}
以上是把網(wǎng)頁的內(nèi)容取了回來,接下來,你把那個OutString存到你的數(shù)據(jù)庫就行了.
這個保存數(shù)據(jù)到數(shù)據(jù)坦塌磨庫這里那你會不會了,會的話就請看下面,不衫銷會的話那就去學(xué)下這個ADO.NET這個最基本的知識點.
protected string getHtml(string url)
{
HttpWebRequest webRequest = null;
HttpWebResponse webResponse = null;
StreamReader streamReader = null;
string content=string.Empty;
try
{
webRequest = (HttpWebRequest)WebRequest.Create(url);
webRequest.Method = “GET”;
webResponse = (HttpWebResponse)webRequest.GetResponse();
Stream stream = webResponse.GetResponseStream();
streamReader = new StreamReader(stream, System.Text.Encoding.GetEncoding(“GB2312”));
content = streamReader.ReadToEnd();
}
catch
{
throw;
}
finally
{
streamReader.Close();
webResponse.Close();
}
return content;
}
protected void Button1_Click(object sender, EventArgs e)
{
string url = this.TextBox1.Text.Trim();
Response.Write(getHtml(url));
}
記讓斗得導(dǎo)入命名空間System.Net;System.IO;
這樣通過按鈕操作就可以讀取某個網(wǎng)址的的HTML代碼。返回的是一個string字符串類型!
然后就可以保存到數(shù)據(jù)庫了!
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Text;
using System.Windows.Forms;
//增加以下命名空蔽昌間
using System.Net;
using System.IO;
using System.Data.OleDb;
namespace PageCodeDownloader
{
public partial class MainForm : Form
{
private string dbConnectionString = string.Empty;
private OleDbConnection dbConnection = null;
private OleDbCommand dbCommand = null;
public MainForm()
{
InitializeComponent();
//界面上增加一個TextBox,修改Name屬性為:txtPageUrl
//界面上增加一個Button,修改Name屬性為btnDownload
//新建一數(shù)據(jù)庫名為db.mdb
//增加一張大消表:download
//表字段: id 自動編號
//url 文本(255)
//宏仿扒pagecode 備注
this.dbConnectionString = @”Provider=Microsoft.Jet.OLEDB.4.0;Data Source=.\db.mdb”;
this.dbConnection = new OleDbConnection(this.dbConnectionString);
}
private void btnDownload_Click(object sender, EventArgs e)
{
if (txtPageUrl.Text != “”)
{
try
{
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(txtPageUrl.Text);
Stream responseStream = request.GetResponse().GetResponseStream();
StreamReader sr = new StreamReader(responseStream,Encoding.Default);
string strPageCode = sr.ReadToEnd();
sr.Close();
string sql = string.Format(“Insert into (,) values(‘{0}’,@PageCode)”,txtPageUrl.Text);
this.dbCommand = new OleDbCommand(sql, this.dbConnection);
this.dbCommand.CommandType = CommandType.Text;
if (this.dbCommand.Connection.State == ConnectionState.Closed)
{
this.dbCommand.Connection.Open();
}
this.dbCommand.Parameters.Add(new OleDbParameter(“@PageCode”,strPageCode));
int EffectRowCount = this.dbCommand.ExecuteNonQuery();
if (EffectRowCount > 0)
{
MessageBox.Show(“保存成功”);
}
else
{
MessageBox.Show(“保存失敗”);
}
}
catch (Exception err)
{
MessageBox.Show(err.Message);
}
}
else
{
MessageBox.Show(“請輸入要獲取代碼的Url”);
}
}
}
}
private void btnSubmit_Click ( object sender, EventArgs e )
{
string strURL = this.txtURl.Text.Trim ( );
Regex regex = new Regex ( @”http(s)?://(+\.)++(/*)?”, RegexOptions.IgnoreCase );
Match match = regex.Match ( strURL );
if ( match.Success )
{
WebClient client = new WebClient ();
try
{
string strResult = client.DownloadString ( strURL );
this.SaveResult ( strResult );
}
catch ( Exception ex )
{
MessageBox.Show ( ex.Message );
}
finally
{
client.Dispose ( );
}
}
else
{
MessageBox.Show ( strURL + “不是一個有效的網(wǎng)址, 請重新輸入!”, “信息提示”, MessageBoxButtons.OK );
this.txtURl.Select ( );
}
}
private void SaveResult ( string result )
{
OleDbConnection connection = new OleDbConnection ( “Provider=Microsoft.Jet.OLEDB.4.0;Data Source=Data\\Database.mdb” );
try
{
connection.Open ( );
OleDbCommand command = connection.CreateCommand ( );
command.CommandType = CommandType.Text;
command.CommandText = “INSERT INTO () VALUES(@Content)”;
OleDbParameter param = new OleDbParameter();
param.ParameterName = “@Content”;
param.Value = result;
command.Parameters.Add(param);
int RowCount = command.ExecuteNonQuery ( );
if ( RowCount > 0 )
{
MessageBox.Show ( “網(wǎng)頁抓取并保存成功.” );
}
else
{
MessageBox.Show ( “網(wǎng)頁成功,但是保存失敗.” );
}
}
catch ( OleDbException ex )
{
MessageBox.Show ( ex.Message );
}
finally
{
connection.Close ( );
}
}
代碼沒空寫,方法可以提供:
1.界面最簡單,PASS
2.連接數(shù)據(jù)庫語句,連接SQL或ACCESS需要確定用何種方式,直接連還是ODBC。
3。設(shè)計數(shù)據(jù)庫結(jié)構(gòu),如何存儲你的網(wǎng)頁,其實是網(wǎng)頁信褲睜息摘姿鋒取了。
4.連接數(shù)據(jù)庫過程:打開數(shù)據(jù)庫,建立連接,傳遞查詢語句,接收返回值,由返回值判斷結(jié)跡純晌果返回給界面。
要想學(xué)習(xí),別再要代碼,切記。
C++用libcurl庫GET網(wǎng)頁(比如baidu.com)并將獲取到的網(wǎng)頁內(nèi)容保存到本地文件夾中
#include
#include “curl/curl.h”
#pragma comment(lib, “ws2_32.lib”)
#pragma comment ( lib, “l(fā)ibcurl.lib” )
#pragma comment ( lib, “ws2_32.lib” )
#pragma comment ( lib, “winmm.lib” )
#pragma comment ( lib, “wldap32.lib” )
//這是libcurl接收數(shù)據(jù)的回調(diào)函數(shù),相當(dāng)于recv的死循環(huán)
//其中stream可以自定義數(shù)據(jù)類型,這里我傳入的是文件保存路徑
static size_t write_callback( void *ptr, size_t size, size_t nmemb, void *stream )
{
int len = size * nmemb;
int written = len;
FILE *fp = NULL;
if ( access( (char*)stream, 0 ) == -1 )
{
fp = fopen( (char*) stream, “wb” );
}
else
{
fp = fopen( (char*) stream, “ab” );
}
if (fp)
{
fwrite( ptr, size, nmemb, fp );
}
return written;
}
int GetUrl( const char *url, char *savepath )
{
CURL *curl;
CURLcode res;
struct curl_slist *chunk = NULL;
curl = curl_easy_init();
if ( curl ) {
curl_easy_setopt( curl, CURLOPT_VERBOSE, 0L );
curl_easy_setopt( curl, CURLOPT_URL, url );
//指定回調(diào)函數(shù)
curl_easy_setopt( curl, CURLOPT_WRITEFUNCTION, write_callback);
//這個變量可作為接收或傳遞數(shù)據(jù)的作用
curl_easy_setopt( curl, CURLOPT_WRITEDATA, savepath );
res = curl_easy_perform( curl );
if (res == CURLE_OK)
{
return 1;
}
return 0;
}
}
int main( void )
{
if ( GetUrl( “t.sin.cn”, “c:/1.txt” ) )
{
printf( “OK” );
}
return 0;
}
如何用一個C#頁面查詢數(shù)據(jù)庫內(nèi)數(shù)據(jù)并在頁面中顯示查詢的內(nèi)容
因為無法確定你使用的具體技術(shù),所以沒法告訴你具體操作,但是渣族大致用到的技術(shù)如下:
html(寫網(wǎng)頁)
數(shù)據(jù)庫(查詢數(shù)據(jù))
一門服務(wù)端語言(含配連接html和數(shù)據(jù)庫)
服務(wù)器部談梁指署(項目發(fā)布)
域名
c 編程提取網(wǎng)頁數(shù)據(jù)庫中內(nèi)容的介紹就聊到這里吧,感謝你花時間閱讀本站內(nèi)容,更多關(guān)于c 編程提取網(wǎng)頁數(shù)據(jù)庫中內(nèi)容,用C編程從網(wǎng)頁數(shù)據(jù)庫中提取內(nèi)容,高分求c#網(wǎng)頁數(shù)據(jù)保存:保存網(wǎng)頁內(nèi)容到數(shù)據(jù)庫中 代碼,C++用libcurl庫GET網(wǎng)頁(比如baidu.com)并將獲取到的網(wǎng)頁內(nèi)容保存到本地文件夾中,如何用一個C#頁面查詢數(shù)據(jù)庫內(nèi)數(shù)據(jù)并在頁面中顯示查詢的內(nèi)容的信息別忘了在本站進行查找喔。
創(chuàng)新互聯(lián)是成都專業(yè)網(wǎng)站建設(shè)、網(wǎng)站制作、網(wǎng)頁設(shè)計、SEO優(yōu)化、手機網(wǎng)站、小程序開發(fā)、APP開發(fā)公司等,多年經(jīng)驗沉淀,立志成為成都網(wǎng)站建設(shè)第一品牌!
分享題目:用C編程從網(wǎng)頁數(shù)據(jù)庫中提取內(nèi)容(c編程提取網(wǎng)頁數(shù)據(jù)庫中內(nèi)容)
當(dāng)前網(wǎng)址:http://www.fisionsoft.com.cn/article/dpespeo.html


咨詢
建站咨詢
